commit 7d085468536a58c2ea1959e6e76939aae1f6944b Author: gabest Date: Sat Dec 8 04:50:48 2007 +0000 diff --git a/GSdx_vs2005.sln b/GSdx_vs2005.sln new file mode 100644 index 0000000..4fef422 --- /dev/null +++ b/GSdx_vs2005.sln @@ -0,0 +1,59 @@ + +Microsoft Visual Studio Solution File, Format Version 9.00 +# Visual Studio 2005 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "gsdx\GSdx_vs2005.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx10", "gsdx10\GSdx10_vs2005.vcproj", "{345C9F24-0B9A-4289-B375-ADD3B63461B7}" + ProjectSection(ProjectDependencies) = postProject + {18E42F6F-3A62-41EE-B42F-79366C4F1E95} = {18E42F6F-3A62-41EE-B42F-79366C4F1E95} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug SSE2|Win32 = Debug SSE2|Win32 + Debug SSE2|x64 = Debug SSE2|x64 + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release SSE2|Win32 = Release SSE2|Win32 + Release SSE2|x64 = Release SSE2|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.Build.0 = Debug|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.Build.0 = Release SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.Build.0 = Release|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.ActiveCfg = Debug|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.Build.0 = Debug|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.ActiveCfg = Debug|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.Build.0 = Debug|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.Build.0 = Release SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.ActiveCfg = Release|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.Build.0 = Release|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.ActiveCfg = Release|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/GSdx_vs2008.sln b/GSdx_vs2008.sln new file mode 100644 index 0000000..c193102 --- /dev/null +++ b/GSdx_vs2008.sln @@ -0,0 +1,59 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "gsdx\GSdx_vs2008.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx10", "gsdx10\GSdx10_vs2008.vcproj", "{345C9F24-0B9A-4289-B375-ADD3B63461B7}" + ProjectSection(ProjectDependencies) = postProject + {18E42F6F-3A62-41EE-B42F-79366C4F1E95} = {18E42F6F-3A62-41EE-B42F-79366C4F1E95} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug SSE2|Win32 = Debug SSE2|Win32 + Debug SSE2|x64 = Debug SSE2|x64 + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release SSE2|Win32 = Release SSE2|Win32 + Release SSE2|x64 = Release SSE2|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.Build.0 = Debug|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.Build.0 = Release SSE2|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64 + {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.Build.0 = Release|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.ActiveCfg = Debug|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.Build.0 = Debug|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.ActiveCfg = Debug|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.Build.0 = Debug|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.Build.0 = Release SSE2|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.ActiveCfg = Release|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.Build.0 = Release|Win32 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.ActiveCfg = Release|x64 + {345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/common.vsprops b/common.vsprops new file mode 100644 index 0000000..8b4a228 --- /dev/null +++ b/common.vsprops @@ -0,0 +1,25 @@ + + + + + diff --git a/debug.vsprops b/debug.vsprops new file mode 100644 index 0000000..9d5c60b --- /dev/null +++ b/debug.vsprops @@ -0,0 +1,19 @@ + + + + + diff --git a/gsdx/GS.h b/gsdx/GS.h new file mode 100644 index 0000000..5e2dab3 --- /dev/null +++ b/gsdx/GS.h @@ -0,0 +1,958 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Special Notes: + * + * Register definitions and most of the enums originate from sps2dev-0.4.0 + * Copyright (C) 2002 Terratron Technologies Inc. All Rights Reserved. + * + */ + +#pragma once + +// + +#pragma pack(push, 1) + +// +// sps2registers.h +// + +enum GS_REG +{ + GS_PMODE = 0x12000000, + GS_SMODE1 = 0x12000010, + GS_SMODE2 = 0x12000020, + GS_SRFSH = 0x12000030, + GS_SYNCH1 = 0x12000040, + GS_SYNCH2 = 0x12000050, + GS_SYNCV = 0x12000060, + GS_DISPFB1 = 0x12000070, + GS_DISPLAY1 = 0x12000080, + GS_DISPFB2 = 0x12000090, + GS_DISPLAY2 = 0x120000a0, + GS_EXTBUF = 0x120000b0, + GS_EXTDATA = 0x120000c0, + GS_EXTWRITE = 0x120000d0, + GS_BGCOLOR = 0x120000e0, + GS_UNKNOWN = 0x12000400, + GS_CSR = 0x12001000, + GS_IMR = 0x12001010, + GS_BUSDIR = 0x12001040, + GS_SIGLBLID = 0x12001080 +}; + +enum GS_PRIM +{ + GS_POINTLIST = 0, + GS_LINELIST = 1, + GS_LINESTRIP = 2, + GS_TRIANGLELIST = 3, + GS_TRIANGLESTRIP = 4, + GS_TRIANGLEFAN = 5, + GS_SPRITE = 6, + GS_INVALID = 7, +}; + +enum GIF_REG +{ + GIF_REG_PRIM = 0x00, + GIF_REG_RGBA = 0x01, + GIF_REG_STQ = 0x02, + GIF_REG_UV = 0x03, + GIF_REG_XYZF2 = 0x04, + GIF_REG_XYZ2 = 0x05, + GIF_REG_TEX0_1 = 0x06, + GIF_REG_TEX0_2 = 0x07, + GIF_REG_CLAMP_1 = 0x08, + GIF_REG_CLAMP_2 = 0x09, + GIF_REG_FOG = 0x0a, + GIF_REG_XYZF3 = 0x0c, + GIF_REG_XYZ3 = 0x0d, + GIF_REG_A_D = 0x0e, + GIF_REG_NOP = 0x0f, +}; + +enum GIF_A_D_REG +{ + GIF_A_D_REG_PRIM = 0x00, + GIF_A_D_REG_RGBAQ = 0x01, + GIF_A_D_REG_ST = 0x02, + GIF_A_D_REG_UV = 0x03, + GIF_A_D_REG_XYZF2 = 0x04, + GIF_A_D_REG_XYZ2 = 0x05, + GIF_A_D_REG_TEX0_1 = 0x06, + GIF_A_D_REG_TEX0_2 = 0x07, + GIF_A_D_REG_CLAMP_1 = 0x08, + GIF_A_D_REG_CLAMP_2 = 0x09, + GIF_A_D_REG_FOG = 0x0a, + GIF_A_D_REG_XYZF3 = 0x0c, + GIF_A_D_REG_XYZ3 = 0x0d, + GIF_A_D_REG_NOP = 0x0f, + GIF_A_D_REG_TEX1_1 = 0x14, + GIF_A_D_REG_TEX1_2 = 0x15, + GIF_A_D_REG_TEX2_1 = 0x16, + GIF_A_D_REG_TEX2_2 = 0x17, + GIF_A_D_REG_XYOFFSET_1 = 0x18, + GIF_A_D_REG_XYOFFSET_2 = 0x19, + GIF_A_D_REG_PRMODECONT = 0x1a, + GIF_A_D_REG_PRMODE = 0x1b, + GIF_A_D_REG_TEXCLUT = 0x1c, + GIF_A_D_REG_SCANMSK = 0x22, + GIF_A_D_REG_MIPTBP1_1 = 0x34, + GIF_A_D_REG_MIPTBP1_2 = 0x35, + GIF_A_D_REG_MIPTBP2_1 = 0x36, + GIF_A_D_REG_MIPTBP2_2 = 0x37, + GIF_A_D_REG_TEXA = 0x3b, + GIF_A_D_REG_FOGCOL = 0x3d, + GIF_A_D_REG_TEXFLUSH = 0x3f, + GIF_A_D_REG_SCISSOR_1 = 0x40, + GIF_A_D_REG_SCISSOR_2 = 0x41, + GIF_A_D_REG_ALPHA_1 = 0x42, + GIF_A_D_REG_ALPHA_2 = 0x43, + GIF_A_D_REG_DIMX = 0x44, + GIF_A_D_REG_DTHE = 0x45, + GIF_A_D_REG_COLCLAMP = 0x46, + GIF_A_D_REG_TEST_1 = 0x47, + GIF_A_D_REG_TEST_2 = 0x48, + GIF_A_D_REG_PABE = 0x49, + GIF_A_D_REG_FBA_1 = 0x4a, + GIF_A_D_REG_FBA_2 = 0x4b, + GIF_A_D_REG_FRAME_1 = 0x4c, + GIF_A_D_REG_FRAME_2 = 0x4d, + GIF_A_D_REG_ZBUF_1 = 0x4e, + GIF_A_D_REG_ZBUF_2 = 0x4f, + GIF_A_D_REG_BITBLTBUF = 0x50, + GIF_A_D_REG_TRXPOS = 0x51, + GIF_A_D_REG_TRXREG = 0x52, + GIF_A_D_REG_TRXDIR = 0x53, + GIF_A_D_REG_HWREG = 0x54, + GIF_A_D_REG_SIGNAL = 0x60, + GIF_A_D_REG_FINISH = 0x61, + GIF_A_D_REG_LABEL = 0x62, +}; + +enum GIF_FLG +{ + GIF_FLG_PACKED = 0, + GIF_FLG_REGLIST = 1, + GIF_FLG_IMAGE = 2, + GIF_FLG_IMAGE2 = 3 +}; + +enum PSM +{ + PSM_PSMCT32 = 0, // 0000-0000 + PSM_PSMCT24 = 1, // 0000-0001 + PSM_PSMCT16 = 2, // 0000-0010 + PSM_PSMCT16S = 10, // 0000-1010 + PSM_PSMT8 = 19, // 0001-0011 + PSM_PSMT4 = 20, // 0001-0100 + PSM_PSMT8H = 27, // 0001-1011 + PSM_PSMT4HL = 36, // 0010-0100 + PSM_PSMT4HH = 44, // 0010-1100 + PSM_PSMZ32 = 48, // 0011-0000 + PSM_PSMZ24 = 49, // 0011-0001 + PSM_PSMZ16 = 50, // 0011-0010 + PSM_PSMZ16S = 58, // 0011-1010 +}; + +// +// sps2regstructs.h +// + +#define REG64(name) \ +union name \ +{ \ + UINT64 i64; \ + UINT32 ai32[2]; \ + struct { \ + +#define REG128(name)\ +union name \ +{ \ + UINT64 ai64[2]; \ + UINT32 ai32[4]; \ + struct { \ + +#define REG64_(prefix, name) REG64(prefix##name) +#define REG128_(prefix, name) REG128(prefix##name) + +#define REG_END }; }; +#define REG_END2 }; + +#define REG64_SET(name) \ +union name \ +{ \ + UINT64 i64; \ + UINT32 ai32[2]; \ + +#define REG128_SET(name)\ +union name \ +{ \ + __m128i ai128; \ + UINT64 ai64[2]; \ + UINT32 ai32[4]; \ + +#define REG_SET_END }; + +REG64_(GSReg, BGCOLOR) + UINT32 R:8; + UINT32 G:8; + UINT32 B:8; + UINT32 _PAD1:8; + UINT32 _PAD2:32; +REG_END + +REG64_(GSReg, BUSDIR) + UINT32 DIR:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GSReg, CSR) + UINT32 rSIGNAL:1; + UINT32 rFINISH:1; + UINT32 rHSINT:1; + UINT32 rVSINT:1; + UINT32 rEDWINT:1; + UINT32 rZERO1:1; + UINT32 rZERO2:1; + UINT32 r_PAD1:1; + UINT32 rFLUSH:1; + UINT32 rRESET:1; + UINT32 r_PAD2:2; + UINT32 rNFIELD:1; + UINT32 rFIELD:1; + UINT32 rFIFO:2; + UINT32 rREV:8; + UINT32 rID:8; + UINT32 wSIGNAL:1; + UINT32 wFINISH:1; + UINT32 wHSINT:1; + UINT32 wVSINT:1; + UINT32 wEDWINT:1; + UINT32 wZERO1:1; + UINT32 wZERO2:1; + UINT32 w_PAD1:1; + UINT32 wFLUSH:1; + UINT32 wRESET:1; + UINT32 w_PAD2:2; + UINT32 wNFIELD:1; + UINT32 wFIELD:1; + UINT32 wFIFO:2; + UINT32 wREV:8; + UINT32 wID:8; +REG_END + +REG64_(GSReg, DISPFB) // (-1/2) + UINT32 FBP:9; + UINT32 FBW:6; + UINT32 PSM:5; + UINT32 _PAD:12; + UINT32 DBX:11; + UINT32 DBY:11; + UINT32 _PAD2:10; +REG_END2 + UINT32 Block() {return FBP<<5;} +REG_END2 + +REG64_(GSReg, DISPLAY) // (-1/2) + UINT32 DX:12; + UINT32 DY:11; + UINT32 MAGH:4; + UINT32 MAGV:2; + UINT32 _PAD:3; + UINT32 DW:12; + UINT32 DH:11; + UINT32 _PAD2:9; +REG_END + +REG64_(GSReg, EXTBUF) + UINT32 EXBP:14; + UINT32 EXBW:6; + UINT32 FBIN:2; + UINT32 WFFMD:1; + UINT32 EMODA:2; + UINT32 EMODC:2; + UINT32 _PAD1:5; + UINT32 WDX:11; + UINT32 WDY:11; + UINT32 _PAD2:10; +REG_END + +REG64_(GSReg, EXTDATA) + UINT32 SX:12; + UINT32 SY:11; + UINT32 SMPH:4; + UINT32 SMPV:2; + UINT32 _PAD1:3; + UINT32 WW:12; + UINT32 WH:11; + UINT32 _PAD2:9; +REG_END + +REG64_(GSReg, EXTWRITE) + UINT32 WRITE:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GSReg, IMR) + UINT32 _PAD1:8; + UINT32 SIGMSK:1; + UINT32 FINISHMSK:1; + UINT32 HSMSK:1; + UINT32 VSMSK:1; + UINT32 EDWMSK:1; + UINT32 _PAD2:19; + UINT32 _PAD3:32; +REG_END + +REG64_(GSReg, PMODE) + UINT32 EN1:1; + UINT32 EN2:1; + UINT32 CRTMD:3; + UINT32 MMOD:1; + UINT32 AMOD:1; + UINT32 SLBG:1; + UINT32 ALP:8; + UINT32 _PAD:16; + UINT32 _PAD1:32; +REG_END + +REG64_(GSReg, SIGLBLID) + UINT32 SIGID:32; + UINT32 LBLID:32; +REG_END + +REG64_(GSReg, SMODE1) + UINT32 RC:3; + UINT32 LC:7; + UINT32 T1248:2; + UINT32 SLCK:1; + UINT32 CMOD:2; + UINT32 EX:1; + UINT32 PRST:1; + UINT32 SINT:1; + UINT32 XPCK:1; + UINT32 PCK2:2; + UINT32 SPML:4; + UINT32 GCONT:1; + UINT32 PHS:1; + UINT32 PVS:1; + UINT32 PEHS:1; + UINT32 PEVS:1; + UINT32 CLKSEL:2; + UINT32 NVCK:1; + UINT32 SLCK2:1; + UINT32 VCKSEL:2; + UINT32 VHP:1; + UINT32 _PAD1:27; +REG_END + +REG64_(GSReg, SMODE2) + UINT32 INT:1; + UINT32 FFMD:1; + UINT32 DPMS:2; + UINT32 _PAD2:28; + UINT32 _PAD3:32; +REG_END + +REG64_SET(GSReg) + GSRegBGCOLOR BGCOLOR; + GSRegBUSDIR BUSDIR; + GSRegCSR CSR; + GSRegDISPFB DISPFB; + GSRegDISPLAY DISPLAY; + GSRegEXTBUF EXTBUF; + GSRegEXTDATA EXTDATA; + GSRegEXTWRITE EXTWRITE; + GSRegIMR IMR; + GSRegPMODE PMODE; + GSRegSIGLBLID SIGLBLID; + GSRegSMODE1 SMODE1; + GSRegSMODE2 SMODE2; +REG_SET_END + +// +// sps2tags.h +// + +#define SET_GIF_REG(gifTag, iRegNo, uiValue) \ + {((GIFTag*)&gifTag)->ai64[1] |= (((uiValue) & 0xf) << ((iRegNo) << 2));} + +#ifdef _M_AMD64 +#define GET_GIF_REG(tag, reg) \ + (((tag).ai64[1] >> ((reg) << 2)) & 0xf) +#else +#define GET_GIF_REG(tag, reg) \ + (((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf) +#endif + +// +// GIFTag + +REG128(GIFTag) + UINT32 NLOOP:15; + UINT32 EOP:1; + UINT32 _PAD1:16; + UINT32 _PAD2:14; + UINT32 PRE:1; + UINT32 PRIM:11; + UINT32 FLG:2; // enum GIF_FLG + UINT32 NREG:4; + UINT64 REGS:64; +REG_END + +// GIFReg + +REG64_(GIFReg, ALPHA) + UINT32 A:2; + UINT32 B:2; + UINT32 C:2; + UINT32 D:2; + UINT32 _PAD1:24; + UINT32 FIX:8; + UINT32 _PAD2:24; +REG_END + +REG64_(GIFReg, BITBLTBUF) + UINT32 SBP:14; + UINT32 _PAD1:2; + UINT32 SBW:6; + UINT32 _PAD2:2; + UINT32 SPSM:6; + UINT32 _PAD3:2; + UINT32 DBP:14; + UINT32 _PAD4:2; + UINT32 DBW:6; + UINT32 _PAD5:2; + UINT32 DPSM:6; + UINT32 _PAD6:2; +REG_END + +REG64_(GIFReg, CLAMP) + UINT64 WMS:2; + UINT64 WMT:2; + UINT64 MINU:10; + UINT64 MAXU:10; + UINT64 MINV:10; + UINT64 MAXV:10; + UINT64 _PAD:20; +REG_END + +REG64_(GIFReg, COLCLAMP) + UINT32 CLAMP:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, DIMX) + UINT32 DM00:3; + UINT32 _PAD00:1; + UINT32 DM01:3; + UINT32 _PAD01:1; + UINT32 DM02:3; + UINT32 _PAD02:1; + UINT32 DM03:3; + UINT32 _PAD03:1; + + UINT32 DM10:3; + UINT32 _PAD10:1; + UINT32 DM11:3; + UINT32 _PAD11:1; + UINT32 DM12:3; + UINT32 _PAD12:1; + UINT32 DM13:3; + UINT32 _PAD13:1; + + UINT32 DM20:3; + UINT32 _PAD20:1; + UINT32 DM21:3; + UINT32 _PAD21:1; + UINT32 DM22:3; + UINT32 _PAD22:1; + UINT32 DM23:3; + UINT32 _PAD23:1; + + UINT32 DM30:3; + UINT32 _PAD30:1; + UINT32 DM31:3; + UINT32 _PAD31:1; + UINT32 DM32:3; + UINT32 _PAD32:1; + UINT32 DM33:3; + UINT32 _PAD33:1; +REG_END + +REG64_(GIFReg, DTHE) + UINT32 DTHE:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, FBA) + UINT32 FBA:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, FINISH) + UINT32 _PAD1:32; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, FOG) + UINT32 _PAD1:32; + UINT32 _PAD2:24; + UINT32 F:8; +REG_END + +REG64_(GIFReg, FOGCOL) + UINT32 FCR:8; + UINT32 FCG:8; + UINT32 FCB:8; + UINT32 _PAD1:8; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, FRAME) + UINT32 FBP:9; + UINT32 _PAD1:7; + UINT32 FBW:6; + UINT32 _PAD2:2; + UINT32 PSM:6; + UINT32 _PAD3:2; + UINT32 FBMSK:32; +REG_END2 + UINT32 Block() {return FBP<<5;} +REG_END2 + +REG64_(GIFReg, HWREG) + UINT32 DATA_LOWER:32; + UINT32 DATA_UPPER:32; +REG_END + +REG64_(GIFReg, LABEL) + UINT32 ID:32; + UINT32 IDMSK:32; +REG_END + +REG64_(GIFReg, MIPTBP1) + UINT64 TBP1:14; + UINT64 TBW1:6; + UINT64 TBP2:14; + UINT64 TBW2:6; + UINT64 TBP3:14; + UINT64 TBW3:6; + UINT64 _PAD:4; +REG_END + +REG64_(GIFReg, MIPTBP2) + UINT64 TBP4:14; + UINT64 TBW4:6; + UINT64 TBP5:14; + UINT64 TBW5:6; + UINT64 TBP6:14; + UINT64 TBW6:6; + UINT64 _PAD:4; +REG_END + +REG64_(GIFReg, NOP) + UINT32 _PAD1:32; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, PABE) + UINT32 PABE:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, PRIM) + UINT32 PRIM:3; + UINT32 IIP:1; + UINT32 TME:1; + UINT32 FGE:1; + UINT32 ABE:1; + UINT32 AA1:1; + UINT32 FST:1; + UINT32 CTXT:1; + UINT32 FIX:1; + UINT32 _PAD1:21; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, PRMODE) + UINT32 _PRIM:3; + UINT32 IIP:1; + UINT32 TME:1; + UINT32 FGE:1; + UINT32 ABE:1; + UINT32 AA1:1; + UINT32 FST:1; + UINT32 CTXT:1; + UINT32 FIX:1; + UINT32 _PAD2:21; + UINT32 _PAD3:32; +REG_END + +REG64_(GIFReg, PRMODECONT) + UINT32 AC:1; + UINT32 _PAD1:31; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, RGBAQ) + UINT32 R:8; + UINT32 G:8; + UINT32 B:8; + UINT32 A:8; + float Q; +REG_END + +REG64_(GIFReg, SCANMSK) + UINT32 MSK:2; + UINT32 _PAD1:30; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, SCISSOR) + UINT32 SCAX0:11; + UINT32 _PAD1:5; + UINT32 SCAX1:11; + UINT32 _PAD2:5; + UINT32 SCAY0:11; + UINT32 _PAD3:5; + UINT32 SCAY1:11; + UINT32 _PAD4:5; +REG_END + +REG64_(GIFReg, SIGNAL) + UINT32 ID:32; + UINT32 IDMSK:32; +REG_END + +REG64_(GIFReg, ST) + float S; + float T; +REG_END + +REG64_(GIFReg, TEST) + UINT32 ATE:1; + UINT32 ATST:3; + UINT32 AREF:8; + UINT32 AFAIL:2; + UINT32 DATE:1; + UINT32 DATM:1; + UINT32 ZTE:1; + UINT32 ZTST:2; + UINT32 _PAD1:13; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, TEX0) + UINT64 TBP0:14; + UINT64 TBW:6; + UINT64 PSM:6; + UINT64 TW:4; + UINT64 TH:4; + UINT64 TCC:1; + UINT64 TFX:2; + UINT64 CBP:14; + UINT64 CPSM:4; + UINT64 CSM:1; + UINT64 CSA:5; + UINT64 CLD:3; +REG_END + +REG64_(GIFReg, TEX1) + UINT32 LCM:1; + UINT32 _PAD1:1; + UINT32 MXL:3; + UINT32 MMAG:1; + UINT32 MMIN:3; + UINT32 MTBA:1; + UINT32 _PAD2:9; + UINT32 L:2; + UINT32 _PAD3:11; + UINT32 K:12; + UINT32 _PAD4:20; +REG_END + +REG64_(GIFReg, TEX2) + UINT32 _PAD1:20; + UINT32 PSM:6; + UINT32 _PAD2:6; + UINT32 _PAD3:5; + UINT32 CBP:14; + UINT32 CPSM:4; + UINT32 CSM:1; + UINT32 CSA:5; + UINT32 CLD:3; +REG_END + +REG64_(GIFReg, TEXA) + UINT32 TA0:8; + UINT32 _PAD1:7; + UINT32 AEM:1; + UINT32 _PAD2:16; + UINT32 TA1:8; + UINT32 _PAD3:24; +REG_END + +REG64_(GIFReg, TEXCLUT) + UINT32 CBW:6; + UINT32 COU:6; + UINT32 COV:10; + UINT32 _PAD1:10; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, TEXFLUSH) + UINT32 _PAD1:32; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, TRXDIR) + UINT32 XDIR:2; + UINT32 _PAD1:30; + UINT32 _PAD2:32; +REG_END + +REG64_(GIFReg, TRXPOS) + UINT32 SSAX:11; + UINT32 _PAD1:5; + UINT32 SSAY:11; + UINT32 _PAD2:5; + UINT32 DSAX:11; + UINT32 _PAD3:5; + UINT32 DSAY:11; + UINT32 DIR:2; + UINT32 _PAD4:3; +REG_END + +REG64_(GIFReg, TRXREG) + UINT32 RRW:12; + UINT32 _PAD1:20; + UINT32 RRH:12; + UINT32 _PAD2:20; +REG_END + +REG64_(GIFReg, UV) + UINT32 U:14; + UINT32 _PAD1:2; + UINT32 V:14; + UINT32 _PAD2:2; + UINT32 _PAD3:32; +REG_END + +REG64_(GIFReg, XYOFFSET) + UINT32 OFX:16; + UINT32 _PAD1:16; + UINT32 OFY:16; + UINT32 _PAD2:16; +REG_END + +REG64_(GIFReg, XYZ) + UINT32 X:16; + UINT32 Y:16; + UINT32 Z:32; +REG_END + +REG64_(GIFReg, XYZF) + UINT32 X:16; + UINT32 Y:16; + UINT32 Z:24; + UINT32 F:8; +REG_END + +REG64_(GIFReg, ZBUF) + UINT32 ZBP:9; + UINT32 _PAD1:15; + // UINT32 PSM:4; + // UINT32 _PAD2:4; + UINT32 PSM:6; + UINT32 _PAD2:2; + UINT32 ZMSK:1; + UINT32 _PAD3:31; +REG_END2 + UINT32 Block() {return ZBP<<5;} +REG_END2 + +REG64_SET(GIFReg) + GIFRegALPHA ALPHA; + GIFRegBITBLTBUF BITBLTBUF; + GIFRegCLAMP CLAMP; + GIFRegCOLCLAMP COLCLAMP; + GIFRegDIMX DIMX; + GIFRegDTHE DTHE; + GIFRegFBA FBA; + GIFRegFINISH FINISH; + GIFRegFOG FOG; + GIFRegFOGCOL FOGCOL; + GIFRegFRAME FRAME; + GIFRegHWREG HWREG; + GIFRegLABEL LABEL; + GIFRegMIPTBP1 MIPTBP1; + GIFRegMIPTBP2 MIPTBP2; + GIFRegNOP NOP; + GIFRegPABE PABE; + GIFRegPRIM PRIM; + GIFRegPRMODE PRMODE; + GIFRegPRMODECONT PRMODECONT; + GIFRegRGBAQ RGBAQ; + GIFRegSCANMSK SCANMSK; + GIFRegSCISSOR SCISSOR; + GIFRegSIGNAL SIGNAL; + GIFRegST ST; + GIFRegTEST TEST; + GIFRegTEX0 TEX0; + GIFRegTEX1 TEX1; + GIFRegTEX2 TEX2; + GIFRegTEXA TEXA; + GIFRegTEXCLUT TEXCLUT; + GIFRegTEXFLUSH TEXFLUSH; + GIFRegTRXDIR TRXDIR; + GIFRegTRXPOS TRXPOS; + GIFRegTRXREG TRXREG; + GIFRegUV UV; + GIFRegXYOFFSET XYOFFSET; + GIFRegXYZ XYZ; + GIFRegXYZF XYZF; + GIFRegZBUF ZBUF; +REG_SET_END + +// GIFPacked + +REG128_(GIFPacked, PRIM) + UINT32 PRIM:11; + UINT32 _PAD1:21; + UINT32 _PAD2:32; + UINT32 _PAD3:32; + UINT32 _PAD4:32; +REG_END + +REG128_(GIFPacked, RGBA) + UINT32 R:8; + UINT32 _PAD1:24; + UINT32 G:8; + UINT32 _PAD2:24; + UINT32 B:8; + UINT32 _PAD3:24; + UINT32 A:8; + UINT32 _PAD4:24; +REG_END + +REG128_(GIFPacked, STQ) + float S; + float T; + float Q; + UINT32 _PAD1:32; +REG_END + +REG128_(GIFPacked, UV) + UINT32 U:14; + UINT32 _PAD1:18; + UINT32 V:14; + UINT32 _PAD2:18; + UINT32 _PAD3:32; + UINT32 _PAD4:32; +REG_END + +REG128_(GIFPacked, XYZF2) + UINT32 X:16; + UINT32 _PAD1:16; + UINT32 Y:16; + UINT32 _PAD2:16; + UINT32 _PAD3:4; + UINT32 Z:24; + UINT32 _PAD4:4; + UINT32 _PAD5:4; + UINT32 F:8; + UINT32 _PAD6:3; + UINT32 ADC:1; + UINT32 _PAD7:16; +REG_END + +REG128_(GIFPacked, XYZ2) + UINT32 X:16; + UINT32 _PAD1:16; + UINT32 Y:16; + UINT32 _PAD2:16; + UINT32 Z:32; + UINT32 _PAD3:15; + UINT32 ADC:1; + UINT32 _PAD4:16; +REG_END + +REG128_(GIFPacked, FOG) + UINT32 _PAD1:32; + UINT32 _PAD2:32; + UINT32 _PAD3:32; + UINT32 _PAD4:4; + UINT32 F:8; + UINT32 _PAD5:20; +REG_END + +REG128_(GIFPacked, A_D) + UINT64 DATA:64; + UINT32 ADDR:8; // enum GIF_A_D_REG + UINT32 _PAD1:24; + UINT32 _PAD2:32; +REG_END + +REG128_(GIFPacked, NOP) + UINT32 _PAD1:32; + UINT32 _PAD2:32; + UINT32 _PAD3:32; + UINT32 _PAD4:32; +REG_END + +REG128_SET(GIFPackedReg) + GIFReg r; + GIFPackedPRIM PRIM; + GIFPackedRGBA RGBA; + GIFPackedSTQ STQ; + GIFPackedUV UV; + GIFPackedXYZF2 XYZF2; + GIFPackedXYZ2 XYZ2; + GIFPackedFOG FOG; + GIFPackedA_D A_D; + GIFPackedNOP NOP; +REG_SET_END + +struct GIFPath +{ + GIFTag tag; + int nreg; + + DWORD GetGIFReg() {return (DWORD)GET_GIF_REG(tag, nreg);} +}; + +#pragma pack(pop) + +enum {KEYPRESS=1, KEYRELEASE=2}; +struct keyEvent {UINT32 key, event;}; + +enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2}; +struct freezeData {int size; BYTE* data;}; + +enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; diff --git a/gsdx/GSDirtyRect.cpp b/gsdx/GSDirtyRect.cpp new file mode 100644 index 0000000..8f9480a --- /dev/null +++ b/gsdx/GSDirtyRect.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSDirtyRect.h" + +GSDirtyRect::GSDirtyRect() + : m_psm(PSM_PSMCT32) + , m_rect(0, 0, 0, 0) +{ +} + +GSDirtyRect::GSDirtyRect(DWORD psm, CRect rect) +{ + m_psm = psm; + m_rect = rect; +} + +CRect GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) +{ + CRect r = m_rect; + + CSize src = GSLocalMemory::m_psm[m_psm].bs; + + r.left = (r.left) & ~(src.cx-1); + r.right = (r.right + (src.cx-1) /* + 1 */) & ~(src.cx-1); + r.top = (r.top) & ~(src.cy-1); + r.bottom = (r.bottom + (src.cy-1) /* + 1 */) & ~(src.cy-1); + + if(m_psm != TEX0.PSM) + { + CSize dst = GSLocalMemory::m_psm[TEX0.PSM].bs; + + r.left = MulDiv(m_rect.left, dst.cx, src.cx); + r.right = MulDiv(m_rect.right, dst.cx, src.cx); + r.top = MulDiv(m_rect.top, dst.cy, src.cy); + r.bottom = MulDiv(m_rect.bottom, dst.cy, src.cy); + } + + return r; +} + +// + +CRect GSDirtyRectList::GetDirtyRect(const GIFRegTEX0& TEX0) +{ + if(IsEmpty()) return CRect(0, 0, 0, 0); + CRect r(INT_MAX, INT_MAX, 0, 0); + POSITION pos = GetHeadPosition(); + while(pos) r |= GetNext(pos).GetDirtyRect(TEX0); + return r; +} + +/* +GSDirtyRectList::GSDirtyRectList() + : m_rects(NULL) + , m_count(0) + , m_maxcount(0) +{ +} + +GSDirtyRectList::~GSDirtyRectList() +{ + delete [] m_rects; +} + +void GSDirtyRectList::AddTail(const GSDirtyRect& r) +{ + if(m_count == m_maxcount) + { + m_maxcount = max(m_count, 8) * 3/2; + + GSDirtyRect* rects = new GSDirtyRect[m_maxcount]; + + memcpy(rects, m_rects, m_count * sizeof(GSDirtyRect)); + + delete [] m_rects; + + m_rects = rects; + + } + + m_rects[m_count++] = r; +} + +CRect GSDirtyRectList::GetDirtyRect(const GIFRegTEX0& TEX0) +{ + if(m_count == 0) + { + return CRect(0, 0, 0, 0); + } + + CRect r(INT_MAX, INT_MAX, 0, 0); + + for(size_t i = 0; i < m_count; i++) + { + r |= m_rects[i].GetDirtyRect(TEX0); + } + + return r; +} +*/ \ No newline at end of file diff --git a/gsdx/GSDirtyRect.h b/gsdx/GSDirtyRect.h new file mode 100644 index 0000000..70371b2 --- /dev/null +++ b/gsdx/GSDirtyRect.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSLocalMemory.h" + +class GSDirtyRect +{ + DWORD m_psm; + CRect m_rect; + +public: + GSDirtyRect(); + GSDirtyRect(DWORD psm, CRect rect); + CRect GetDirtyRect(const GIFRegTEX0& TEX0); +}; + +class GSDirtyRectList : public CAtlList +{ +public: + GSDirtyRectList() {} + CRect GetDirtyRect(const GIFRegTEX0& TEX0); +}; \ No newline at end of file diff --git a/gsdx/GSDrawingContext.h b/gsdx/GSDrawingContext.h new file mode 100644 index 0000000..8a3f44f --- /dev/null +++ b/gsdx/GSDrawingContext.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GS.h" +#include "GSLocalMemory.h" + +#pragma pack(push, 1) + +struct GSDrawingContext +{ + struct GSDrawingContext() {memset(this, 0, sizeof(*this));} + + GIFRegXYOFFSET XYOFFSET; + GIFRegTEX0 TEX0; + GIFRegTEX1 TEX1; + GIFRegTEX2 TEX2; + GIFRegCLAMP CLAMP; + GIFRegMIPTBP1 MIPTBP1; + GIFRegMIPTBP2 MIPTBP2; + GIFRegSCISSOR SCISSOR; + GIFRegALPHA ALPHA; + GIFRegTEST TEST; + GIFRegFBA FBA; + GIFRegFRAME FRAME; + GIFRegZBUF ZBUF; + + GSLocalMemory::psm_t* ftbl; + GSLocalMemory::psm_t* ztbl; + GSLocalMemory::psm_t* ttbl; + + struct {float x0, y0, x1, y1;} scissor; + + void UpdateScissor() + { + scissor.x0 = (float)(int)((int)(SCISSOR.SCAX0 << 4) + (int)XYOFFSET.OFX); + scissor.y0 = (float)(int)((int)(SCISSOR.SCAY0 << 4) + (int)XYOFFSET.OFY); + scissor.x1 = (float)(int)((int)(SCISSOR.SCAX1 << 4) + (int)XYOFFSET.OFX); + scissor.y1 = (float)(int)((int)(SCISSOR.SCAY1 << 4) + (int)XYOFFSET.OFY); + } +}; + +#pragma pack(pop) \ No newline at end of file diff --git a/gsdx/GSDrawingEnvironment.h b/gsdx/GSDrawingEnvironment.h new file mode 100644 index 0000000..e963bb4 --- /dev/null +++ b/gsdx/GSDrawingEnvironment.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GS.h" + +#pragma pack(push, 1) + +struct GSDrawingEnvironment +{ + struct GSDrawingEnvironment() {memset(this, 0, sizeof(*this));} + + GIFRegPRIM PRIM; + GIFRegPRMODE PRMODE; + GIFRegPRMODECONT PRMODECONT; + GIFRegTEXCLUT TEXCLUT; + GIFRegSCANMSK SCANMSK; + GIFRegTEXA TEXA; + GIFRegFOGCOL FOGCOL; + GIFRegDIMX DIMX; + GIFRegDTHE DTHE; + GIFRegCOLCLAMP COLCLAMP; + GIFRegPABE PABE; + GIFRegBITBLTBUF BITBLTBUF; + GIFRegTRXDIR TRXDIR; + GIFRegTRXPOS TRXPOS; + GIFRegTRXREG TRXREG; + GIFRegTRXREG TRXREG2; + GSDrawingContext CTXT[2]; +}; + +#pragma pack(pop) diff --git a/gsdx/GSLocalMemory.cpp b/gsdx/GSLocalMemory.cpp new file mode 100644 index 0000000..053e3c6 --- /dev/null +++ b/gsdx/GSLocalMemory.cpp @@ -0,0 +1,1937 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * Special Notes: + * + * Based on Page.c from GSSoft + * Copyright (C) 2002-2004 GSsoft Team + * + */ + +#include "StdAfx.h" +#include "GSLocalMemory.h" +#include "x86.h" + +#define ASSERT_BLOCK(r, w, h) \ + ASSERT((r).Width() >= w && (r).Height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \ + +#if defined(_M_AMD64) || _M_IX86_FP >= 2 +#define BLOCK_PREFETCH(mem) \ + _mm_prefetch(&mem[16*0], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*2], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*4], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*6], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*8], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*10], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*12], _MM_HINT_T0); \ + _mm_prefetch(&mem[16*14], _MM_HINT_T0); \ + +#define BLOCK_PREFETCH_32(x, y, w) {const char* next = (const char*)&m_vm32[blockAddress32(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);} +#define BLOCK_PREFETCH_16(x, y, w) {const char* next = (const char*)&m_vm16[blockAddress16(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);} +#define BLOCK_PREFETCH_16S(x, y, w) {const char* next = (const char*)&m_vm16[blockAddress16S(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);} +#define BLOCK_PREFETCH_8(x, y, w) {const char* next = (const char*)&m_vm8[blockAddress8(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);} +#define BLOCK_PREFETCH_4(x, y, w) {const char* next = (const char*)&m_vm8[blockAddress4(x + (w), y, TEX0.TBP0, TEX0.TBW)>>1]; BLOCK_PREFETCH(next);} +#else +#define BLOCK_PREFETCH_32(x, y, w) +#define BLOCK_PREFETCH_16(x, y, w) +#define BLOCK_PREFETCH_16S(x, y, w) +#define BLOCK_PREFETCH_8(x, y, w) +#define BLOCK_PREFETCH_4(x, y, w) +#endif + +#define FOREACH_BLOCK_START(r, w, h, t) \ + for(int y = (r).top; y < (r).bottom; y += (h)) \ + { ASSERT_BLOCK(r, w, h); \ + BYTE* ptr = dst + (y-(r).top)*dstpitch; \ + for(int x = (r).left; x < (r).right; x += (w)) \ + { \ + BLOCK_PREFETCH_##t##(x + (w), y, w) \ + +#define FOREACH_BLOCK_END }} + +// + +DWORD GSLocalMemory::pageOffset32[32][32][64]; +DWORD GSLocalMemory::pageOffset32Z[32][32][64]; +DWORD GSLocalMemory::pageOffset16[32][64][64]; +DWORD GSLocalMemory::pageOffset16S[32][64][64]; +DWORD GSLocalMemory::pageOffset16Z[32][64][64]; +DWORD GSLocalMemory::pageOffset16SZ[32][64][64]; +DWORD GSLocalMemory::pageOffset8[32][64][128]; +DWORD GSLocalMemory::pageOffset4[32][128][128]; + +int GSLocalMemory::rowOffset32[2048]; +int GSLocalMemory::rowOffset32Z[2048]; +int GSLocalMemory::rowOffset16[2048]; +int GSLocalMemory::rowOffset16S[2048]; +int GSLocalMemory::rowOffset16Z[2048]; +int GSLocalMemory::rowOffset16SZ[2048]; +int GSLocalMemory::rowOffset8[2][2048]; +int GSLocalMemory::rowOffset4[2][2048]; + +// + +DWORD GSLocalMemory::m_xtbl[1024]; +DWORD GSLocalMemory::m_ytbl[1024]; + +// + +GSLocalMemory::psm_t GSLocalMemory::m_psm[64]; + +// + +GSLocalMemory::GSLocalMemory() + : m_fCLUTMayBeDirty(true) +{ + int len = 1024*1024*4*2; // *2 for safety... + + m_vm8 = (BYTE*)_aligned_malloc(len, 16); + + memset(m_vm8, 0, len); + + m_pCLUT = (WORD*)_aligned_malloc(256*2*sizeof(WORD)*2, 16); + m_pCLUT32 = (DWORD*)_aligned_malloc(256*sizeof(DWORD), 16); + m_pCLUT64 = (UINT64*)_aligned_malloc(256*sizeof(UINT64), 16); + + for(int bp = 0; bp < 32; bp++) + { + for(int y = 0; y < 32; y++) for(int x = 0; x < 64; x++) + { + pageOffset32[bp][y][x] = pixelAddressOrg32(x, y, bp, 0); + pageOffset32Z[bp][y][x] = pixelAddressOrg32Z(x, y, bp, 0); + } + + for(int y = 0; y < 64; y++) for(int x = 0; x < 64; x++) + { + pageOffset16[bp][y][x] = pixelAddressOrg16(x, y, bp, 0); + pageOffset16S[bp][y][x] = pixelAddressOrg16S(x, y, bp, 0); + pageOffset16Z[bp][y][x] = pixelAddressOrg16Z(x, y, bp, 0); + pageOffset16SZ[bp][y][x] = pixelAddressOrg16SZ(x, y, bp, 0); + } + + for(int y = 0; y < 64; y++) for(int x = 0; x < 128; x++) + { + pageOffset8[bp][y][x] = pixelAddressOrg8(x, y, bp, 0); + } + + for(int y = 0; y < 128; y++) for(int x = 0; x < 128; x++) + { + pageOffset4[bp][y][x] = pixelAddressOrg4(x, y, bp, 0); + } + } + + { + for(int x = 0; x < countof(rowOffset32); x++) + rowOffset32[x] = (int)pixelAddress32(x, 0, 0, 32) - (int)pixelAddress32(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset32Z); x++) + rowOffset32Z[x] = (int)pixelAddress32Z(x, 0, 0, 32) - (int)pixelAddress32Z(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset16); x++) + rowOffset16[x] = (int)pixelAddress16(x, 0, 0, 32) - (int)pixelAddress16(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset16S); x++) + rowOffset16S[x] = (int)pixelAddress16S(x, 0, 0, 32) - (int)pixelAddress16S(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset16Z); x++) + rowOffset16Z[x] = (int)pixelAddress16Z(x, 0, 0, 32) - (int)pixelAddress16Z(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset16SZ); x++) + rowOffset16SZ[x] = (int)pixelAddress16SZ(x, 0, 0, 32) - (int)pixelAddress16SZ(0, 0, 0, 32); + + for(int x = 0; x < countof(rowOffset8[0]); x++) + rowOffset8[0][x] = (int)pixelAddress8(x, 0, 0, 32) - (int)pixelAddress8(0, 0, 0, 32), + rowOffset8[1][x] = (int)pixelAddress8(x, 2, 0, 32) - (int)pixelAddress8(0, 2, 0, 32); + + for(int x = 0; x < countof(rowOffset4[0]); x++) + rowOffset4[0][x] = (int)pixelAddress4(x, 0, 0, 32) - (int)pixelAddress4(0, 0, 0, 32), + rowOffset4[1][x] = (int)pixelAddress4(x, 2, 0, 32) - (int)pixelAddress4(0, 2, 0, 32); + } + + for(int i = 0; i < countof(m_psm); i++) + { + m_psm[i].pa = &GSLocalMemory::pixelAddress32; + m_psm[i].ba = &GSLocalMemory::blockAddress32; + m_psm[i].pga = &GSLocalMemory::pageAddress32; + m_psm[i].rp = &GSLocalMemory::readPixel32; + m_psm[i].rpa = &GSLocalMemory::readPixel32; + m_psm[i].wp = &GSLocalMemory::writePixel32; + m_psm[i].wpa = &GSLocalMemory::writePixel32; + m_psm[i].rt = &GSLocalMemory::readTexel32; + m_psm[i].rtNP = &GSLocalMemory::readTexel32; + m_psm[i].rtP = &GSLocalMemory::readTexel32; + m_psm[i].rta = &GSLocalMemory::readTexel32; + m_psm[i].wfa = &GSLocalMemory::writePixel32; + m_psm[i].st = &GSLocalMemory::SwizzleTexture32; + m_psm[i].ust = &GSLocalMemory::unSwizzleTexture32; + m_psm[i].ustP = &GSLocalMemory::unSwizzleTexture32; + m_psm[i].ustNP = &GSLocalMemory::unSwizzleTexture32; + m_psm[i].bpp = m_psm[i].trbpp = 32; + m_psm[i].pal = 0; + m_psm[i].bs = CSize(8, 8); + m_psm[i].pgs = CSize(64, 32); + for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32; + } + + m_psm[PSM_PSMCT16].pa = &GSLocalMemory::pixelAddress16; + m_psm[PSM_PSMCT16S].pa = &GSLocalMemory::pixelAddress16S; + m_psm[PSM_PSMT8].pa = &GSLocalMemory::pixelAddress8; + m_psm[PSM_PSMT4].pa = &GSLocalMemory::pixelAddress4; + m_psm[PSM_PSMZ32].pa = &GSLocalMemory::pixelAddress32Z; + m_psm[PSM_PSMZ24].pa = &GSLocalMemory::pixelAddress32Z; + m_psm[PSM_PSMZ16].pa = &GSLocalMemory::pixelAddress16Z; + m_psm[PSM_PSMZ16S].pa = &GSLocalMemory::pixelAddress16SZ; + + m_psm[PSM_PSMCT16].ba = &GSLocalMemory::blockAddress16; + m_psm[PSM_PSMCT16S].ba = &GSLocalMemory::blockAddress16S; + m_psm[PSM_PSMT8].ba = &GSLocalMemory::blockAddress8; + m_psm[PSM_PSMT4].ba = &GSLocalMemory::blockAddress4; + m_psm[PSM_PSMZ32].ba = &GSLocalMemory::blockAddress32Z; + m_psm[PSM_PSMZ24].ba = &GSLocalMemory::blockAddress32Z; + m_psm[PSM_PSMZ16].ba = &GSLocalMemory::blockAddress16Z; + m_psm[PSM_PSMZ16S].ba = &GSLocalMemory::blockAddress16SZ; + + m_psm[PSM_PSMCT16].pga = &GSLocalMemory::pageAddress16; + m_psm[PSM_PSMCT16S].pga = &GSLocalMemory::pageAddress16; + m_psm[PSM_PSMZ16].pga = &GSLocalMemory::pageAddress16; + m_psm[PSM_PSMZ16S].pga = &GSLocalMemory::pageAddress16; + m_psm[PSM_PSMT8].pga = &GSLocalMemory::pageAddress8; + m_psm[PSM_PSMT4].pga = &GSLocalMemory::pageAddress4; + + m_psm[PSM_PSMCT24].rp = &GSLocalMemory::readPixel24; + m_psm[PSM_PSMCT16].rp = &GSLocalMemory::readPixel16; + m_psm[PSM_PSMCT16S].rp = &GSLocalMemory::readPixel16S; + m_psm[PSM_PSMT8].rp = &GSLocalMemory::readPixel8; + m_psm[PSM_PSMT4].rp = &GSLocalMemory::readPixel4; + m_psm[PSM_PSMT8H].rp = &GSLocalMemory::readPixel8H; + m_psm[PSM_PSMT4HL].rp = &GSLocalMemory::readPixel4HL; + m_psm[PSM_PSMT4HH].rp = &GSLocalMemory::readPixel4HH; + m_psm[PSM_PSMZ32].rp = &GSLocalMemory::readPixel32Z; + m_psm[PSM_PSMZ24].rp = &GSLocalMemory::readPixel24Z; + m_psm[PSM_PSMZ16].rp = &GSLocalMemory::readPixel16Z; + m_psm[PSM_PSMZ16S].rp = &GSLocalMemory::readPixel16SZ; + + m_psm[PSM_PSMCT24].rpa = &GSLocalMemory::readPixel24; + m_psm[PSM_PSMCT16].rpa = &GSLocalMemory::readPixel16; + m_psm[PSM_PSMCT16S].rpa = &GSLocalMemory::readPixel16S; + m_psm[PSM_PSMT8].rpa = &GSLocalMemory::readPixel8; + m_psm[PSM_PSMT4].rpa = &GSLocalMemory::readPixel4; + m_psm[PSM_PSMT8H].rpa = &GSLocalMemory::readPixel8H; + m_psm[PSM_PSMT4HL].rpa = &GSLocalMemory::readPixel4HL; + m_psm[PSM_PSMT4HH].rpa = &GSLocalMemory::readPixel4HH; + m_psm[PSM_PSMZ32].rpa = &GSLocalMemory::readPixel32Z; + m_psm[PSM_PSMZ24].rpa = &GSLocalMemory::readPixel24Z; + m_psm[PSM_PSMZ16].rpa = &GSLocalMemory::readPixel16Z; + m_psm[PSM_PSMZ16S].rpa = &GSLocalMemory::readPixel16SZ; + + m_psm[PSM_PSMCT32].wp = &GSLocalMemory::writePixel32; + m_psm[PSM_PSMCT24].wp = &GSLocalMemory::writePixel24; + m_psm[PSM_PSMCT16].wp = &GSLocalMemory::writePixel16; + m_psm[PSM_PSMCT16S].wp = &GSLocalMemory::writePixel16S; + m_psm[PSM_PSMT8].wp = &GSLocalMemory::writePixel8; + m_psm[PSM_PSMT4].wp = &GSLocalMemory::writePixel4; + m_psm[PSM_PSMT8H].wp = &GSLocalMemory::writePixel8H; + m_psm[PSM_PSMT4HL].wp = &GSLocalMemory::writePixel4HL; + m_psm[PSM_PSMT4HH].wp = &GSLocalMemory::writePixel4HH; + m_psm[PSM_PSMZ32].wp = &GSLocalMemory::writePixel32Z; + m_psm[PSM_PSMZ24].wp = &GSLocalMemory::writePixel24Z; + m_psm[PSM_PSMZ16].wp = &GSLocalMemory::writePixel16Z; + m_psm[PSM_PSMZ16S].wp = &GSLocalMemory::writePixel16SZ; + + m_psm[PSM_PSMCT32].wpa = &GSLocalMemory::writePixel32; + m_psm[PSM_PSMCT24].wpa = &GSLocalMemory::writePixel24; + m_psm[PSM_PSMCT16].wpa = &GSLocalMemory::writePixel16; + m_psm[PSM_PSMCT16S].wpa = &GSLocalMemory::writePixel16S; + m_psm[PSM_PSMT8].wpa = &GSLocalMemory::writePixel8; + m_psm[PSM_PSMT4].wpa = &GSLocalMemory::writePixel4; + m_psm[PSM_PSMT8H].wpa = &GSLocalMemory::writePixel8H; + m_psm[PSM_PSMT4HL].wpa = &GSLocalMemory::writePixel4HL; + m_psm[PSM_PSMT4HH].wpa = &GSLocalMemory::writePixel4HH; + m_psm[PSM_PSMZ32].wpa = &GSLocalMemory::writePixel32Z; + m_psm[PSM_PSMZ24].wpa = &GSLocalMemory::writePixel24Z; + m_psm[PSM_PSMZ16].wpa = &GSLocalMemory::writePixel16Z; + m_psm[PSM_PSMZ16S].wpa = &GSLocalMemory::writePixel16SZ; + + m_psm[PSM_PSMCT24].rt = &GSLocalMemory::readTexel24; + m_psm[PSM_PSMCT16].rt = &GSLocalMemory::readTexel16; + m_psm[PSM_PSMCT16S].rt = &GSLocalMemory::readTexel16S; + m_psm[PSM_PSMT8].rt = &GSLocalMemory::readTexel8; + m_psm[PSM_PSMT4].rt = &GSLocalMemory::readTexel4; + m_psm[PSM_PSMT8H].rt = &GSLocalMemory::readTexel8H; + m_psm[PSM_PSMT4HL].rt = &GSLocalMemory::readTexel4HL; + m_psm[PSM_PSMT4HH].rt = &GSLocalMemory::readTexel4HH; + + m_psm[PSM_PSMCT24].rta = &GSLocalMemory::readTexel24; + m_psm[PSM_PSMCT16].rta = &GSLocalMemory::readTexel16; + m_psm[PSM_PSMCT16S].rta = &GSLocalMemory::readTexel16S; + m_psm[PSM_PSMT8].rta = &GSLocalMemory::readTexel8; + m_psm[PSM_PSMT4].rta = &GSLocalMemory::readTexel4; + m_psm[PSM_PSMT8H].rta = &GSLocalMemory::readTexel8H; + m_psm[PSM_PSMT4HL].rta = &GSLocalMemory::readTexel4HL; + m_psm[PSM_PSMT4HH].rta = &GSLocalMemory::readTexel4HH; + + m_psm[PSM_PSMCT24].wfa = &GSLocalMemory::writePixel24; + m_psm[PSM_PSMCT16].wfa = &GSLocalMemory::writeFrame16; + m_psm[PSM_PSMCT16S].wfa = &GSLocalMemory::writeFrame16S; + + m_psm[PSM_PSMCT16].rtP = &GSLocalMemory::readTexel16P; + m_psm[PSM_PSMCT16S].rtP = &GSLocalMemory::readTexel16SP; + m_psm[PSM_PSMT8].rtP = &GSLocalMemory::readTexel8P; + m_psm[PSM_PSMT4].rtP = &GSLocalMemory::readTexel4P; + m_psm[PSM_PSMT8H].rtP = &GSLocalMemory::readTexel8HP; + m_psm[PSM_PSMT4HL].rtP = &GSLocalMemory::readTexel4HLP; + m_psm[PSM_PSMT4HH].rtP = &GSLocalMemory::readTexel4HHP; + + m_psm[PSM_PSMCT16].rtNP = &GSLocalMemory::readTexel16P; + m_psm[PSM_PSMCT16S].rtNP = &GSLocalMemory::readTexel16SP; + m_psm[PSM_PSMT8].rtNP = &GSLocalMemory::readTexel8; + m_psm[PSM_PSMT4].rtNP = &GSLocalMemory::readTexel4; + m_psm[PSM_PSMT8H].rtNP = &GSLocalMemory::readTexel8H; + m_psm[PSM_PSMT4HL].rtNP = &GSLocalMemory::readTexel4HL; + m_psm[PSM_PSMT4HH].rtNP = &GSLocalMemory::readTexel4HH; + + m_psm[PSM_PSMCT24].st = &GSLocalMemory::SwizzleTexture24; + m_psm[PSM_PSMCT16].st = &GSLocalMemory::SwizzleTexture16; + m_psm[PSM_PSMCT16S].st = &GSLocalMemory::SwizzleTexture16S; + m_psm[PSM_PSMT8].st = &GSLocalMemory::SwizzleTexture8; + m_psm[PSM_PSMT4].st = &GSLocalMemory::SwizzleTexture4; + m_psm[PSM_PSMT8H].st = &GSLocalMemory::SwizzleTexture8H; + m_psm[PSM_PSMT4HL].st = &GSLocalMemory::SwizzleTexture4HL; + m_psm[PSM_PSMT4HH].st = &GSLocalMemory::SwizzleTexture4HH; + + m_psm[PSM_PSMCT24].ust = &GSLocalMemory::unSwizzleTexture24; + m_psm[PSM_PSMCT16].ust = &GSLocalMemory::unSwizzleTexture16; + m_psm[PSM_PSMCT16S].ust = &GSLocalMemory::unSwizzleTexture16S; + m_psm[PSM_PSMT8].ust = &GSLocalMemory::unSwizzleTexture8; + m_psm[PSM_PSMT4].ust = &GSLocalMemory::unSwizzleTexture4; + m_psm[PSM_PSMT8H].ust = &GSLocalMemory::unSwizzleTexture8H; + m_psm[PSM_PSMT4HL].ust = &GSLocalMemory::unSwizzleTexture4HL; + m_psm[PSM_PSMT4HH].ust = &GSLocalMemory::unSwizzleTexture4HH; + + m_psm[PSM_PSMCT16].ustP = &GSLocalMemory::unSwizzleTexture16P; + m_psm[PSM_PSMCT16S].ustP = &GSLocalMemory::unSwizzleTexture16SP; + m_psm[PSM_PSMT8].ustP = &GSLocalMemory::unSwizzleTexture8P; + m_psm[PSM_PSMT4].ustP = &GSLocalMemory::unSwizzleTexture4P; + m_psm[PSM_PSMT8H].ustP = &GSLocalMemory::unSwizzleTexture8HP; + m_psm[PSM_PSMT4HL].ustP = &GSLocalMemory::unSwizzleTexture4HLP; + m_psm[PSM_PSMT4HH].ustP = &GSLocalMemory::unSwizzleTexture4HHP; + + m_psm[PSM_PSMCT16].ustNP = &GSLocalMemory::unSwizzleTexture16P; + m_psm[PSM_PSMCT16S].ustNP = &GSLocalMemory::unSwizzleTexture16SP; + m_psm[PSM_PSMT8].ustNP = &GSLocalMemory::unSwizzleTexture8NP; + m_psm[PSM_PSMT4].ustNP = &GSLocalMemory::unSwizzleTexture4NP; + m_psm[PSM_PSMT8H].ustNP = &GSLocalMemory::unSwizzleTexture8HNP; + m_psm[PSM_PSMT4HL].ustNP = &GSLocalMemory::unSwizzleTexture4HLNP; + m_psm[PSM_PSMT4HH].ustNP = &GSLocalMemory::unSwizzleTexture4HHNP; + + m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256; + m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16; + + m_psm[PSM_PSMCT16].bpp = m_psm[PSM_PSMCT16S].bpp = 16; + m_psm[PSM_PSMT8].bpp = 8; + m_psm[PSM_PSMT4].bpp = 4; + m_psm[PSM_PSMZ16].bpp = m_psm[PSM_PSMZ16S].bpp = 16; + + m_psm[PSM_PSMCT24].trbpp = 24; + m_psm[PSM_PSMCT16].trbpp = m_psm[PSM_PSMCT16S].trbpp = 16; + m_psm[PSM_PSMT8].trbpp = m_psm[PSM_PSMT8H].trbpp = 8; + m_psm[PSM_PSMT4].trbpp = m_psm[PSM_PSMT4HL].trbpp = m_psm[PSM_PSMT4HH].trbpp = 4; + m_psm[PSM_PSMZ24].trbpp = 24; + m_psm[PSM_PSMZ16].trbpp = m_psm[PSM_PSMZ16S].trbpp = 16; + + m_psm[PSM_PSMCT16].bs = m_psm[PSM_PSMCT16S].bs = CSize(16, 8); + m_psm[PSM_PSMT8].bs = CSize(16, 16); + m_psm[PSM_PSMT4].bs = CSize(32, 32); + m_psm[PSM_PSMZ16].bs = m_psm[PSM_PSMZ16S].bs = CSize(16, 8); + + m_psm[PSM_PSMCT16].pgs = m_psm[PSM_PSMCT16S].pgs = CSize(64, 64); + m_psm[PSM_PSMT8].pgs = CSize(128, 64); + m_psm[PSM_PSMT4].pgs = CSize(128, 128); + m_psm[PSM_PSMZ16].pgs = m_psm[PSM_PSMZ16S].pgs = CSize(64, 64); + + for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16].rowOffset[i] = rowOffset16; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16S].rowOffset[i] = rowOffset16S; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMT8].rowOffset[i] = rowOffset8[((i+2)>>2)&1]; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMT4].rowOffset[i] = rowOffset4[((i+2)>>2)&1]; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ32].rowOffset[i] = rowOffset32Z; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ24].rowOffset[i] = rowOffset32Z; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16].rowOffset[i] = rowOffset16Z; + for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16S].rowOffset[i] = rowOffset16SZ; +} + +GSLocalMemory::~GSLocalMemory() +{ + _aligned_free(m_vm8); + _aligned_free(m_pCLUT); + _aligned_free(m_pCLUT32); + _aligned_free(m_pCLUT64); +} + +//////////////////// + +bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD fbp, DWORD fbw) +{ + const psm_t& tbl = m_psm[psm]; + + writePixel wp = tbl.wp; + pixelAddress ba = tbl.ba; + + int w = tbl.bs.cx; + int h = tbl.bs.cy; + int bpp = tbl.bpp; + + int shift = 0; + + switch(bpp) + { + case 32: shift = 0; break; + case 16: shift = 1; c = (c&0xffff)*0x00010001; break; + case 8: shift = 2; c = (c&0xff)*0x01010101; break; + case 4: shift = 3; c = (c&0xf)*0x11111111; break; + } + + CRect clip((r.left+(w-1))&~(w-1), (r.top+(h-1))&~(h-1), r.right&~(w-1), r.bottom&~(h-1)); + + for(int y = r.top; y < clip.top; y++) + for(int x = r.left; x < r.right; x++) + (this->*wp)(x, y, c, fbp, fbw); + + for(int y = clip.top; y < clip.bottom; y += h) + { + for(int ys = y, ye = y + h; ys < ye; ys++) + { + for(int x = r.left; x < clip.left; x++) + (this->*wp)(x, ys, c, fbp, fbw); + for(int x = clip.right; x < r.right; x++) + (this->*wp)(x, ys, c, fbp, fbw); + } + } + + if(psm == PSM_PSMCT24 || psm == PSM_PSMZ24) + { + c &= 0x00ffffff; + + for(int y = clip.top; y < clip.bottom; y += h) + { + for(int x = clip.left; x < clip.right; x += w) + { + DWORD* p = &m_vm32[ba(x, y, fbp, fbw)]; + + for(int i = 0; i < 64; i++) + { + p[i] = (p[i] & 0xff000000) | c; + } + } + } + } + else + { + for(int y = clip.top; y < clip.bottom; y += h) + for(int x = clip.left; x < clip.right; x += w) + memsetd(&m_vm8[ba(x, y, fbp, fbw) << 2 >> shift], c, 64); + } + + for(int y = clip.bottom; y < r.bottom; y++) + for(int x = r.left; x < r.right; x++) + (this->*wp)(x, y, c, fbp, fbw); + + return(true); +} + +//////////////////// + +bool GSLocalMemory::IsCLUTDirty(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT) +{ + return m_fCLUTMayBeDirty || m_prevTEX0.i64 != TEX0.i64 || m_prevTEXCLUT.i64 != TEXCLUT.i64; +} + +bool GSLocalMemory::WriteCLUT(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT) +{ + switch(TEX0.CLD) + { + default: + case 0: return false; + case 1: break; + case 2: m_CBP[0] = TEX0.CBP; break; + case 3: m_CBP[1] = TEX0.CBP; break; + case 4: if(m_CBP[0] == TEX0.CBP) return false; + case 5: if(m_CBP[1] == TEX0.CBP) return false; + } + + if(!IsCLUTDirty(TEX0, TEXCLUT)) + { + return false; + } + + m_prevTEX0 = TEX0; + m_prevTEXCLUT = TEXCLUT; + + m_fCLUTMayBeDirty = false; + + DWORD bp = TEX0.CBP; + DWORD bw = TEX0.CSM == 0 ? 1 : TEXCLUT.CBW; + + WORD* pCLUT = m_pCLUT + (TEX0.CSA<<4); + + // NOTE: TEX0.CPSM == PSM_PSMCT24 is non-standard, KH uses it + + if(TEX0.CSM == 0) + { + if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) + { + WORD* vm = &m_vm16[TEX0.CPSM == PSM_PSMCT16 ? blockAddress16(0, 0, bp, bw) : blockAddress16S(0, 0, bp, bw)]; + + if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H) + { + WriteCLUT_T16_I8_CSM1(vm, pCLUT); + } + else if(TEX0.PSM == PSM_PSMT4HH || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4) + { + WriteCLUT_T16_I4_CSM1(vm, pCLUT); + } + } + else if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) + { + DWORD* vm = &m_vm32[blockAddress32(0, 0, bp, bw)]; + + if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H) + { + WriteCLUT_T32_I8_CSM1(vm, pCLUT); + } + else if(TEX0.PSM == PSM_PSMT4HH || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4) + { + WriteCLUT_T32_I4_CSM1(vm, pCLUT); + } + } + } + else + { + readPixel rp = m_psm[TEX0.CPSM].rp; + + int nPaletteEntries = m_psm[TEX0.PSM].pal; + + ASSERT(nPaletteEntries == 0 || TEX0.CPSM == PSM_PSMCT16); // this is the only allowed format for CSM2, but we implement all of them, just in case... + + if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) + { + for(int i = 0; i < nPaletteEntries; i++) + { + pCLUT[i] = (WORD)(this->*rp)((TEXCLUT.COU<<4) + i, TEXCLUT.COV, bp, bw); + } + } + else if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24) + { + for(int i = 0; i < nPaletteEntries; i++) + { + DWORD dw = (this->*rp)((TEXCLUT.COU<<4) + i, TEXCLUT.COV, bp, bw); + pCLUT[i] = (WORD)(dw & 0xffff); + pCLUT[i+256] = (WORD)(dw >> 16); + } + } + } + + return true; +} + +// + +void GSLocalMemory::ReadCLUT(GIFRegTEX0 TEX0, DWORD* pCLUT32) +{ + ASSERT(pCLUT32); + + WORD* pCLUT = m_pCLUT + (TEX0.CSA << 4); + + if(TEX0.CPSM == PSM_PSMCT32) + { + switch(TEX0.PSM) + { + case PSM_PSMT8: + case PSM_PSMT8H: + ReadCLUT32_T32_I8(pCLUT, pCLUT32); + break; + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ReadCLUT32_T32_I4(pCLUT, pCLUT32); + break; + } + } + else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) + { + switch(TEX0.PSM) + { + case PSM_PSMT8: + case PSM_PSMT8H: + ReadCLUT32_T16_I8(pCLUT, pCLUT32); + break; + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ReadCLUT32_T16_I4(pCLUT, pCLUT32); + break; + } + } +} + +void GSLocalMemory::SetupCLUT(GIFRegTEX0 TEX0) +{ + // TODO: cache m_pCLUT* + + ReadCLUT(TEX0, m_pCLUT32); + + switch(TEX0.PSM) + { + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + // sse2? + if(TEX0.CPSM == PSM_PSMCT32) + { + for(int j = 0, k = 0; j < 16; j++) + for(int i = 0; i < 16; i++, k++) + m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 32) | m_pCLUT32[i]; + } + else + { + for(int j = 0, k = 0; j < 16; j++) + for(int i = 0; i < 16; i++, k++) + m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 16) | (m_pCLUT32[i] & 0xffff); + } + break; + } +} + +// + +void GSLocalMemory::ReadCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, DWORD* pCLUT32) +{ + ASSERT(pCLUT32); + + WORD* pCLUT = m_pCLUT + (TEX0.CSA << 4); + + if(TEX0.CPSM == PSM_PSMCT32) + { + switch(TEX0.PSM) + { + case PSM_PSMT8: + case PSM_PSMT8H: + ReadCLUT32_T32_I8(pCLUT, pCLUT32); + break; + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ReadCLUT32_T32_I4(pCLUT, pCLUT32); + break; + } + } + else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) + { + Expand16(pCLUT, pCLUT32, m_psm[TEX0.PSM].pal, &TEXA); + } +} + +void GSLocalMemory::SetupCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA) +{ + // TODO: cache m_pCLUT* + + ReadCLUT32(TEX0, TEXA, m_pCLUT32); + + switch(TEX0.PSM) + { + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + // sse2? + for(int j = 0, k = 0; j < 16; j++) + for(int i = 0; i < 16; i++, k++) + m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 32) | m_pCLUT32[i]; + break; + } +} + +void GSLocalMemory::CopyCLUT32(DWORD* pCLUT32, int nPaletteEntries) +{ + memcpy(pCLUT32, m_pCLUT32, sizeof(DWORD)*nPaletteEntries); +} + +//////////////////// + +static void SwizzleTextureStep(int& tx, int& ty, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ +// if(ty == TRXREG.RRH && tx == TRXPOS.DSAX) ASSERT(0); + + if(++tx == TRXREG.RRW) + { + tx = TRXPOS.DSAX; + ty++; + } +} + +#define IsTopLeftAligned(dsax, tx, ty, bw, bh) \ + (((dsax) & ((bw)-1)) == 0 && ((tx) & ((bw)-1)) == 0 && (dsax) == (tx) && ((ty) & ((bh)-1)) == 0) + +void GSLocalMemory::SwizzleTexture32(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*4; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); + + if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch)) + { + if(fTopLeftAligned && tw >= 8 && th >= 8) + { + int twa = tw & ~7; + int tha = th & ~7; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 8) + { + for(int x = tx; x < twa; x += 8) + SwizzleBlock32u((BYTE*)&m_vm32[blockAddress32(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch); + + for(int i = 0; i < 8; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + if(len > 0 && tw >= 8 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 2)) + { + int twa = tw & ~7; + int tha = th & ~1; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 2) + { + for(int x = tx; x < twa; x += 8) + SwizzleColumn32(ty, (BYTE*)&m_vm32[blockAddress32(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch); + + for(int i = 0; i < 2; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + th += ty; + + if((DWORD_PTR)src & 0xf) + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 8) + SwizzleBlock32u((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch); + } + else + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 8) + SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch); + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture24(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*3; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); + + if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch)) + { + // TODO + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + __declspec(align(16)) DWORD block[8*8]; + + th += ty; + + for(int y = ty; y < th; y += 8, src += srcpitch*8) + { + for(int x = tx; x < tw; x += 8) + { + BYTE* s = src + (x - tx)*3; + DWORD* d = block; + + for(int j = 0, diff = srcpitch - 8*3; j < 8; j++, s += diff, d += 8) + for(int i = 0; i < 8; i++, s += 3) + d[i] = (s[2]<<16)|(s[1]<<8)|s[0]; + + SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0x00ffffff); + } + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture16(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*2; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 8); + + if(!fTopLeftAligned || (tw & 15) || (th & 7) || (len % srcpitch)) + { + if(fTopLeftAligned && tw >= 16 && th >= 8) + { + int twa = tw & ~15; + int tha = th & ~7; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 8) + { + for(int x = tx; x < twa; x += 16) + SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + + for(int i = 0; i < 8; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + if(len > 0 && tw >= 16 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 2)) + { + int twa = tw & ~15; + int tha = th & ~1; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 2) + { + for(int x = tx; x < twa; x += 16) + SwizzleColumn16(ty, (BYTE*)&m_vm16[blockAddress16(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + + for(int i = 0; i < 2; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + th += ty; + + if((DWORD_PTR)src & 0xf) + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 16) + SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + } + else + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 16) + SwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture16S(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*2; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 8); + + if(!fTopLeftAligned || (tw & 15) || (th & 7) || (len % srcpitch)) + { + if(fTopLeftAligned && tw >= 16 && th >= 8) + { + int twa = tw & ~15; + int tha = th & ~7; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 8) + { + for(int x = tx; x < twa; x += 16) + SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16S(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + + for(int i = 0; i < 8; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + if(len > 0 && tw >= 16 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 2)) + { + int twa = tw & ~15; + int tha = th & ~1; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 2) + { + for(int x = tx; x < twa; x += 16) + SwizzleColumn16(ty, (BYTE*)&m_vm16[blockAddress16S(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + + for(int i = 0; i < 2; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + th += ty; + + if((DWORD_PTR)src & 0xf) + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 16) + SwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + } + else + { + for(int y = ty; y < th; y += 8, src += srcpitch*8) + for(int x = tx; x < tw; x += 16) + SwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch); + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture8(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 16); + + if(!fTopLeftAligned || (tw & 15) || (th & 15) || (len % srcpitch)) + { + if(fTopLeftAligned && tw >= 16 && th >= 16) + { + int twa = tw & ~15; + int tha = th & ~15; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 16) + { + for(int x = tx; x < twa; x += 16) + SwizzleBlock8u((BYTE*)&m_vm8[blockAddress8(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch); + + for(int i = 0; i < 16; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + if(len > 0 && tw >= 16 && th >= 4 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 4)) + { + int twa = tw & ~15; + int tha = th & ~3; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 4) + { + for(int x = tx; x < twa; x += 16) + SwizzleColumn8(ty, (BYTE*)&m_vm8[blockAddress8(x, ty&~15, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch); + + for(int i = 0; i < 4; i++, ty++, src += srcpitch) + for(int x = twa; x < tw; x++) + writePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + th += ty; + + if((DWORD_PTR)src & 0xf) + { + for(int y = ty; y < th; y += 16, src += srcpitch*16) + for(int x = tx; x < tw; x += 16) + SwizzleBlock8u((BYTE*)&m_vm8[blockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch); + } + else + { + for(int y = ty; y < th; y += 16, src += srcpitch*16) + for(int x = tx; x < tw; x += 16) + SwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch); + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture8H(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); + + if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch)) + { + // TODO + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + __declspec(align(16)) DWORD block[8*8]; + + th += ty; + + for(int y = ty; y < th; y += 8, src += srcpitch*8) + { + for(int x = tx; x < tw; x += 8) + { + BYTE* s = src + (x - tx); + DWORD* d = block; + + for(int j = 0; j < 8; j++, s += srcpitch, d += 8) + for(int i = 0; i < 8; i++) + d[i] = s[i] << 24; + + SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0xff000000); + } + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture4(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 32, 16); + + if(!fTopLeftAligned || (tw & 31) || (th & 15) || (len % srcpitch)) + { + if(fTopLeftAligned && tw >= 32 && th >= 16) + { + int twa = tw & ~31; + int tha = th & ~15; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 16) + { + for(int x = tx; x < twa; x += 32) + SwizzleBlock4u((BYTE*)&m_vm8[blockAddress4(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch); + + for(int i = 0; i < 16; i++, ty++, src += srcpitch) + { + BYTE* s = src + (twa - tx)/2; + + for(int x = twa; x < tw; x += 2, s++) + { + writePixel4(x, ty, *s&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW), + writePixel4(x+1, ty, *s>>4, BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + } + } + + if(len > 0 && tw >= 32 && th >= 4 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 32, 4)) + { + int twa = tw & ~31; + int tha = th & ~3; + + len -= tha * srcpitch; + th -= tha; + + for(int j = 0; j < tha; j += 4) + { + for(int x = tx; x < twa; x += 32) + SwizzleColumn4(ty, (BYTE*)&m_vm8[blockAddress4(x, ty&~15, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch); + + for(int i = 0; i < 4; i++, ty++, src += srcpitch) + { + BYTE* s = src + (twa - tx)/2; + + for(int x = twa; x < tw; x += 2, s++) + { + writePixel4(x, ty, *s&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW), + writePixel4(x+1, ty, *s>>4, BITBLTBUF.DBP, BITBLTBUF.DBW); + } + } + } + } + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + th += ty; + + if((DWORD_PTR)src & 0xf) + { + for(int y = ty; y < th; y += 16, src += srcpitch*16) + for(int x = tx; x < tw; x += 32) + SwizzleBlock4u((BYTE*)&m_vm8[blockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch); + } + else + { + for(int y = ty; y < th; y += 16, src += srcpitch*16) + for(int x = tx; x < tw; x += 32) + SwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch); + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); + + if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch)) + { + // TODO + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + __declspec(align(16)) DWORD block[8*8]; + + th += ty; + + for(int y = ty; y < th; y += 8, src += srcpitch*8) + { + for(int x = tx; x < tw; x += 8) + { + BYTE* s = src + (x - tx)/2; + DWORD* d = block; + + for(int j = 0; j < 8; j++, s += srcpitch, d += 8) + for(int i = 0; i < 8/2; i++) + d[i*2] = (s[i]&0x0f) << 24, + d[i*2+1] = (s[i]&0xf0) << 20; + + SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0x0f000000); + } + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTexture4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(TRXREG.RRW == 0) return; + + int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2; + int th = len / srcpitch; + + bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); + + if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch)) + { + // TODO + + SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG); + } + else + { + __declspec(align(16)) DWORD block[8*8]; + + th += ty; + + for(int y = ty; y < th; y += 8, src += srcpitch*8) + { + for(int x = tx; x < tw; x += 8) + { + BYTE* s = src + (x - tx)/2; + DWORD* d = block; + + for(int j = 0; j < 8; j++, s += srcpitch, d += 8) + for(int i = 0; i < 8/2; i++) + d[i*2] = (s[i]&0x0f) << 28, + d[i*2+1] = (s[i]&0xf0) << 24; + + SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0xf0000000); + } + } + + ty = th; + } +} + +void GSLocalMemory::SwizzleTextureX(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) +{ + if(len <= 0) return; + + BYTE* pb = (BYTE*)src; + WORD* pw = (WORD*)src; + DWORD* pd = (DWORD*)src; + + // if(ty >= (int)TRXREG.RRH) {ASSERT(0); return;} + + switch(BITBLTBUF.DPSM) + { + case PSM_PSMCT32: + for(len /= 4; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pd++) + writePixel32(tx, ty, *pd, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMCT24: + for(len /= 3; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb+=3) + writePixel24(tx, ty, *(DWORD*)pb, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMCT16: + for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++) + writePixel16(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMCT16S: + for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++) + writePixel16S(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMT8: + for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++) + writePixel8(tx, ty, *pb, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMT4: + for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++) + writePixel4(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW), + writePixel4(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMT8H: + for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++) + writePixel8H(tx, ty, *pb, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMT4HL: + for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++) + writePixel4HL(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW), + writePixel4HL(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMT4HH: + for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++) + writePixel4HH(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW), + writePixel4HH(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMZ32: + for(len /= 4; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pd++) + writePixel32Z(tx, ty, *pd, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMZ24: + for(len /= 3; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb+=3) + writePixel24Z(tx, ty, *(DWORD*)pb, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMZ16: + for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++) + writePixel16Z(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + case PSM_PSMZ16S: + for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++) + writePixel16SZ(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW); + break; + } +} + +/////////////////// + +void GSLocalMemory::unSwizzleTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*4, dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + ExpandBlock24(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 8, 16) + { + __declspec(align(16)) WORD block[16*8]; + unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + ExpandBlock16(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 8, 16S) + { + __declspec(align(16)) WORD block[16*8]; + unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + ExpandBlock16(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 16, 8) + { + __declspec(align(16)) BYTE block[16*16]; + unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/16); + + BYTE* s = block; + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 16; j++, s += 16, d += dstpitch) + for(int i = 0; i < 16; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i]]; + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i] >> 24]; + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 32, 16, 4) + { + __declspec(align(16)) BYTE block[(32/2)*16]; + unSwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block)/16); + + BYTE* s = block; + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch) + for(int i = 0; i < 32/2; i++) + ((UINT64*)d)[i] = m_pCLUT64[s[i]]; + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[(s[i] >> 24)&0x0f]; + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i] >> 28]; + } + FOREACH_BLOCK_END +} + +/////////////////// + +void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP) +{ + unSwizzleTexture st = m_psm[TEX0.PSM].ust; + readTexel rt = m_psm[TEX0.PSM].rt; + CSize bs = m_psm[TEX0.PSM].bs; + + if(r.Width() < bs.cx || r.Height() < bs.cy + || (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) + || (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) + || (CLAMP.WMS == 3) || (CLAMP.WMT == 3)) + { + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + } + else + { + (this->*st)(r, dst, dstpitch, TEX0, TEXA); + } +} + +/////////////////// + +void GSLocalMemory::unSwizzleTexture16P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 8, 16) + { + unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*2, dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture16SP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 8, 16S) + { + unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*2, dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture8P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 16, 8) + { + unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture8HP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + unSwizzleBlock8HP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 32, 16, 4) + { + unSwizzleBlock4P((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], ptr + (x-r.left), dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + unSwizzleBlock4HLP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch); + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + unSwizzleBlock4HHP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch); + } + FOREACH_BLOCK_END +} + +/////////////////// + +void GSLocalMemory::ReadTextureP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP) +{ + unSwizzleTexture st = m_psm[TEX0.PSM].ustP; + readTexel rt = m_psm[TEX0.PSM].rtP; + CSize bs = m_psm[TEX0.PSM].bs; + + if(r.Width() < bs.cx || r.Height() < bs.cy + || (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) + || (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) + || (CLAMP.WMS == 3) || (CLAMP.WMT == 3)) + { + switch(TEX0.PSM) + { + default: + ASSERT(0); + case PSM_PSMCT32: + case PSM_PSMCT24: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + case PSM_PSMT8: + case PSM_PSMT8H: + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + } + } + else + { + (this->*st)(r, dst, dstpitch, TEX0, TEXA); + } +} + +/////////////////// + +void GSLocalMemory::unSwizzleTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 16, 16, 8) + { + __declspec(align(16)) BYTE block[16*16]; + unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/16); + + BYTE* s = block; + + if(TEX0.CPSM == PSM_PSMCT32) + { + BYTE* d = ptr + (x-r.left)*4; + for(int j = 0; j < 16; j++, s += 16, d += dstpitch) + for(int i = 0; i < 16; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i]]; + } + else + { + ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S); + + BYTE* d = ptr + (x-r.left)*2; + for(int j = 0; j < 16; j++, s += 16, d += dstpitch) + for(int i = 0; i < 16; i++) + ((WORD*)d)[i] = (WORD)m_pCLUT32[s[i]]; + } + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + + if(TEX0.CPSM == PSM_PSMCT32) + { + BYTE* d = ptr + (x-r.left)*4; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i] >> 24]; + } + else + { + ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S); + + BYTE* d = ptr + (x-r.left)*2; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((WORD*)d)[i] = (WORD)m_pCLUT32[s[i] >> 24]; + } + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 32, 16, 4) + { + __declspec(align(16)) BYTE block[(32/2)*16]; + unSwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block)/16); + + BYTE* s = block; + + if(TEX0.CPSM == PSM_PSMCT32) + { + BYTE* d = ptr + (x-r.left)*4; + + for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch) + for(int i = 0; i < 32/2; i++) + ((UINT64*)d)[i] = m_pCLUT64[s[i]]; + } + else + { + ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S); + + BYTE* d = ptr + (x-r.left)*2; + for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch) + for(int i = 0; i < 32/2; i++) + ((DWORD*)d)[i] = (DWORD)m_pCLUT64[s[i]]; + } + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + + if(TEX0.CPSM == PSM_PSMCT32) + { + BYTE* d = ptr + (x-r.left)*4; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[(s[i] >> 24)&0x0f]; + } + else + { + ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S); + + BYTE* d = ptr + (x-r.left)*2; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((WORD*)d)[i] = (WORD)m_pCLUT32[(s[i] >> 24)&0x0f]; + } + } + FOREACH_BLOCK_END +} + +void GSLocalMemory::unSwizzleTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) +{ + FOREACH_BLOCK_START(r, 8, 8, 32) + { + __declspec(align(16)) DWORD block[8*8]; + unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8); + + DWORD* s = block; + + if(TEX0.CPSM == PSM_PSMCT32) + { + BYTE* d = ptr + (x-r.left)*4; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)d)[i] = m_pCLUT32[s[i] >> 28]; + } + else + { + ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S); + + BYTE* d = ptr + (x-r.left)*2; + for(int j = 0; j < 8; j++, s += 8, d += dstpitch) + for(int i = 0; i < 8; i++) + ((WORD*)d)[i] = (WORD)m_pCLUT32[s[i] >> 28]; + } + } + FOREACH_BLOCK_END +} + +/////////////////// + +void GSLocalMemory::ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP) +{ + unSwizzleTexture st = m_psm[TEX0.PSM].ustNP; + readTexel rt = m_psm[TEX0.PSM].rtNP; + CSize bs = m_psm[TEX0.PSM].bs; + + if(r.Width() < bs.cx || r.Height() < bs.cy + || (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) + || (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) + || (CLAMP.WMS == 3) || (CLAMP.WMT == 3)) + { + switch(TEX0.PSM) + { + default: + case PSM_PSMCT32: + case PSM_PSMCT24: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + case PSM_PSMT8: + case PSM_PSMT8H: + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + switch(TEX0.CPSM) + { + default: + ASSERT(0); + case PSM_PSMCT32: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + ReadTexture(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st); + break; + } + break; + } + } + else + { + (this->*st)(r, dst, dstpitch, TEX0, TEXA); + } +} + +// + +template +void GSLocalMemory::ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, unSwizzleTexture st) +{ + // this function is not thread safe! + + DWORD wms = CLAMP.WMS, wmt = CLAMP.WMT; + DWORD minu = CLAMP.MINU, maxu = CLAMP.MAXU; + DWORD minv = CLAMP.MINV, maxv = CLAMP.MAXV; + + if(wms == 2) + { + r.left = min(r.right, max(r.left, (int)minu)); + r.right = max(r.left, min(r.right, (int)maxu)); + } + + if(wmt == 2) + { + r.top = min(r.bottom, max(r.top, (int)minv)); + r.bottom = max(r.top, min(r.bottom, (int)maxv)); + } + + CSize bs = m_psm[TEX0.PSM].bs; + + int bsxm = bs.cx - 1; + int bsym = bs.cy - 1; + + CRect cr((r.left + bsxm) & ~bsxm, (r.top + bsym) & ~bsym, r.right & ~bsxm, r.bottom & ~bsym); + + bool aligned = ((DWORD_PTR)(dst + (cr.left - r.left) * sizeof(T)) & 0xf) == 0; + + if(wms == 3 || wmt == 3) // TODO: do region repeat in pixel shader + { + if(wms == 3 && wmt == 3) + { + int w = minu + 1; + int h = minv + 1; + + w = (w + bsxm) & ~bsxm; + h = (h + bsym) & ~bsym; + + if(w % bs.cx == 0 && maxu % bs.cx == 0 && h % bs.cy == 0 && maxv % bs.cy == 0) + { +//printf("!!! 3 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom); + + T* buff = (T*)_aligned_malloc(w * h * sizeof(T), 16); + + (this->*st)(CRect(CPoint(maxu, maxv), CSize(w, h)), (BYTE*)buff, w * sizeof(T), TEX0, TEXA); + + dst -= r.left; + + int k = (r.right - r.left) >> 2; + + for(int y = r.top; y < r.bottom; y++, dst += dstpitch) + { + T* src = &buff[(y & minv) * w]; + + int x = r.left; + + for(int i = 0; i < k; x += 4, i++) + { + ((T*)dst)[x+0] = src[(x+0) & minu]; + ((T*)dst)[x+1] = src[(x+1) & minu]; + ((T*)dst)[x+2] = src[(x+2) & minu]; + ((T*)dst)[x+3] = src[(x+3) & minu]; + } + + for(; x < r.right; x++) + { + ((T*)dst)[x] = src[x & minu]; + } + } + + _aligned_free(buff); + + return; + } + } + + switch(wms) + { + default: for(int x = r.left; x < r.right; x++) m_xtbl[x] = x; break; + case 3: for(int x = r.left; x < r.right; x++) m_xtbl[x] = (x & minu) | maxu; break; + } + + switch(wmt) + { + default: for(int y = r.top; y < r.bottom; y++) m_ytbl[y] = y; break; + case 3: for(int y = r.top; y < r.bottom; y++) m_ytbl[y] = (y & minv) | maxv; break; + } + +//printf("1 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom); + + for(int y = r.top; y < r.bottom; y++, dst += dstpitch) + for(int x = r.left, i = 0; x < r.right; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(m_xtbl[x], m_ytbl[y], TEX0, TEXA); + } + else + { + if(aligned) + { + for(int y = r.top; y < cr.top; y++, dst += dstpitch) + for(int x = r.left, i = 0; x < r.right; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA); + + if(!cr.IsRectEmpty()) + { + (this->*st)(cr, dst + (cr.left - r.left)*sizeof(T), dstpitch, TEX0, TEXA); + } + + for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch) + { + for(int x = r.left, i = 0; x < cr.left; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA); + for(int x = cr.right, i = x - r.left; x < r.right; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA); + } + + for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch) + for(int x = r.left, i = 0; x < r.right; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA); + } + else + { +//printf("2 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom); + + for(int y = r.top; y < r.bottom; y++, dst += dstpitch) + for(int x = r.left, i = 0; x < r.right; x++, i++) + ((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA); + } + } +} + +// +/* +HRESULT GSLocalMemory::SaveBMP(ID3D10Device* dev, LPCTSTR fn, DWORD bp, DWORD bw, DWORD psm, int w, int h) +{ + D3D10_TEXTURE2D_DESC desc; + + memset(&desc, 0, sizeof(desc)); + + desc.Width = w; + desc.Height = h; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.Usage = D3D10_USAGE_STAGING; + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + + CComPtr texture; + + HRESULT hr = dev->CreateTexture2D(&desc, NULL, &texture); + + D3D10_MAPPED_TEXTURE2D map; + + if(FAILED(hr) || FAILED(texture->Map(0, D3D10_MAP_WRITE, 0, &map))) + { + return E_FAIL; + } + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = bp; + TEX0.TBW = bw; + TEX0.PSM = psm; + + GIFRegTEXA TEXA; + + TEXA.AEM = 0; + TEXA.TA0 = 0; + TEXA.TA1 = 0x80; + + // (this->*m_psm[TEX0.PSM].ust)(CRect(0, 0, w, h), (BYTE*)lr.pBits, lr.Pitch, TEX0, TEXA); + + readTexel rt = m_psm[psm].rt; + + BYTE* p = (BYTE*)map.pData; + + for(int j = 0; j < h; j++, p += map.RowPitch) + for(int i = 0; i < w; i++) + ((DWORD*)p)[i] = (this->*rt)(i, j, TEX0, TEXA); + + texture->Unmap(0); + + return D3DX10SaveTextureToFile(texture, D3DX10_IFF_BMP, fn); +} +*/ \ No newline at end of file diff --git a/gsdx/GSLocalMemory.h b/gsdx/GSLocalMemory.h new file mode 100644 index 0000000..1888c20 --- /dev/null +++ b/gsdx/GSLocalMemory.h @@ -0,0 +1,917 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#pragma warning(disable: 4100) // warning C4100: 'TEXA' : unreferenced formal parameter +#pragma warning(disable: 4244) // warning C4244: '=' : conversion from 'const UINT64' to 'int', possible loss of data (really???) + +#include "GS.h" +#include "GSTables.h" + +class GSLocalMemory +{ +public: + typedef DWORD (*pixelAddress)(int x, int y, DWORD bp, DWORD bw); + typedef void (GSLocalMemory::*writePixel)(int x, int y, DWORD c, DWORD bp, DWORD bw); + typedef void (GSLocalMemory::*writeFrame)(int x, int y, DWORD c, DWORD bp, DWORD bw); + typedef DWORD (GSLocalMemory::*readPixel)(int x, int y, DWORD bp, DWORD bw); + typedef DWORD (GSLocalMemory::*readTexel)(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + typedef void (GSLocalMemory::*writePixelAddr)(DWORD addr, DWORD c); + typedef void (GSLocalMemory::*writeFrameAddr)(DWORD addr, DWORD c); + typedef DWORD (GSLocalMemory::*readPixelAddr)(DWORD addr); + typedef DWORD (GSLocalMemory::*readTexelAddr)(DWORD addr, GIFRegTEXA& TEXA); + typedef void (GSLocalMemory::*SwizzleTexture)(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + typedef void (GSLocalMemory::*unSwizzleTexture)(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + typedef void (GSLocalMemory::*readTexture)(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP); + + typedef union + { + struct + { + pixelAddress pa, ba, pga; + readPixel rp; + readPixelAddr rpa; + writePixel wp; + writePixelAddr wpa; + readTexel rt, rtP, rtNP; + readTexelAddr rta; + writeFrameAddr wfa; + SwizzleTexture st; + unSwizzleTexture ust, ustP, ustNP; + DWORD bpp, pal, trbpp; + CSize bs, pgs; + int* rowOffset[8]; + }; + BYTE dummy[128]; + } psm_t; + + static psm_t m_psm[64]; + +protected: + static DWORD pageOffset32[32][32][64]; + static DWORD pageOffset32Z[32][32][64]; + static DWORD pageOffset16[32][64][64]; + static DWORD pageOffset16S[32][64][64]; + static DWORD pageOffset16Z[32][64][64]; + static DWORD pageOffset16SZ[32][64][64]; + static DWORD pageOffset8[32][64][128]; + static DWORD pageOffset4[32][128][128]; + + static int rowOffset32[2048]; + static int rowOffset32Z[2048]; + static int rowOffset16[2048]; + static int rowOffset16S[2048]; + static int rowOffset16Z[2048]; + static int rowOffset16SZ[2048]; + static int rowOffset8[2][2048]; + static int rowOffset4[2][2048]; + + union {BYTE* m_vm8; WORD* m_vm16; DWORD* m_vm32;}; + + DWORD m_CBP[2]; + WORD* m_pCLUT; + DWORD* m_pCLUT32; + UINT64* m_pCLUT64; + + GIFRegTEX0 m_prevTEX0; + GIFRegTEXCLUT m_prevTEXCLUT; + bool m_fCLUTMayBeDirty; + +public: + GSLocalMemory(); + virtual ~GSLocalMemory(); + + BYTE* GetVM() + { + return m_vm8; + } + + __forceinline static void RoundDown(CSize& s, CSize bs) + { + s.cx &= ~(bs.cx-1); + s.cy &= ~(bs.cy-1); + } + + __forceinline static void RoundUp(CSize& s, CSize bs) + { + s.cx = (s.cx + (bs.cx-1)) & ~(bs.cx-1); + s.cy = (s.cy + (bs.cy-1)) & ~(bs.cy-1); + } + + __forceinline static DWORD Expand24To32(DWORD c, GIFRegTEXA& TEXA) + { + return (((!TEXA.AEM | (c & 0xffffff)) ? TEXA.TA0 : 0) << 24) | (c & 0xffffff); + } + + __forceinline static DWORD Expand16To32(WORD c, GIFRegTEXA& TEXA) + { + return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3); + } + + // address + + static DWORD pageAddress32(int x, int y, DWORD bp, DWORD bw) + { + return ((bp >> 5) + (y >> 5) * bw + (x >> 6)) << 11; + } + + static DWORD pageAddress16(int x, int y, DWORD bp, DWORD bw) + { + return ((bp >> 5) + (y >> 6) * bw + (x >> 6)) << 12; + } + + static DWORD pageAddress8(int x, int y, DWORD bp, DWORD bw) + { + return ((bp >> 5) + (y >> 6) * ((bw+1)>>1) + (x >> 7)) << 13; + } + + static DWORD pageAddress4(int x, int y, DWORD bp, DWORD bw) + { + return ((bp >> 5) + (y >> 7) * ((bw+1)>>1) + (x >> 7)) << 14; + } + + static DWORD blockAddress32(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7]; + return (page + block) << 6; + } + + static DWORD blockAddress16(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3]; + return (page + block) << 7; + } + + static DWORD blockAddress16S(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3]; + return (page + block) << 7; + } + + static DWORD blockAddress8(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f); + DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7]; + return (page + block) << 8; + } + + static DWORD blockAddress4(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f); + DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3]; + return (page + block) << 9; + } + + static DWORD blockAddress32Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7]; + return (page + block) << 6; + } + + static DWORD blockAddress16Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3]; + return (page + block) << 7; + } + + static DWORD blockAddress16SZ(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3]; + return (page + block) << 7; + } + + static DWORD pixelAddressOrg32(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7]; + DWORD word = ((page + block) << 6) + columnTable32[y & 7][x & 7]; + ASSERT(word < 1024*1024); + return word; + } + + static DWORD pixelAddressOrg16(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3]; + DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15]; + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddressOrg16S(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3]; + DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15]; + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddressOrg8(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f); + DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7]; + DWORD word = ((page + block) << 8) + columnTable8[y & 15][x & 15]; + // ASSERT(word < 1024*1024*4); + return word; + } + + static DWORD pixelAddressOrg4(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f); + DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3]; + DWORD word = ((page + block) << 9) + columnTable4[y & 15][x & 31]; + ASSERT(word < 1024*1024*8); + return word; + } + + static DWORD pixelAddressOrg32Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7]; + DWORD word = ((page + block) << 6) + ((y & 7) << 3) + (x & 7); + ASSERT(word < 1024*1024); + return word; + } + + static DWORD pixelAddressOrg16Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3]; + DWORD word = ((page + block) << 7) + ((y & 7) << 4) + (x & 15); + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddressOrg16SZ(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); + DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3]; + DWORD word = ((page + block) << 7) + ((y & 7) << 4) + (x & 15); + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddress32(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 5) * bw + (x >> 6); + DWORD word = (page << 11) + pageOffset32[bp & 0x1f][y & 0x1f][x & 0x3f]; + ASSERT(word < 1024*1024); + return word; + } + + static DWORD pixelAddress16(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); + DWORD word = (page << 12) + pageOffset16[bp & 0x1f][y & 0x3f][x & 0x3f]; + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddress16S(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); + DWORD word = (page << 12) + pageOffset16S[bp & 0x1f][y & 0x3f][x & 0x3f]; + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddress8(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 6) * ((bw + 1)>>1) + (x >> 7); + DWORD word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f]; + ASSERT(word < 1024*1024*4); + return word; + } + + static DWORD pixelAddress4(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 7) * ((bw + 1)>>1) + (x >> 7); + DWORD word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f]; + ASSERT(word < 1024*1024*8); + return word; + } + + static DWORD pixelAddress32Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 5) * bw + (x >> 6); + DWORD word = (page << 11) + pageOffset32Z[bp & 0x1f][y & 0x1f][x & 0x3f]; + ASSERT(word < 1024*1024); + return word; + } + + static DWORD pixelAddress16Z(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); + DWORD word = (page << 12) + pageOffset16Z[bp & 0x1f][y & 0x3f][x & 0x3f]; + ASSERT(word < 1024*1024*2); + return word; + } + + static DWORD pixelAddress16SZ(int x, int y, DWORD bp, DWORD bw) + { + DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); + DWORD word = (page << 12) + pageOffset16SZ[bp & 0x1f][y & 0x3f][x & 0x3f]; + ASSERT(word < 1024*1024*2); + return word; + } + + // pixel R/W + + __forceinline DWORD readPixel32(DWORD addr) + { + return m_vm32[addr]; + } + + __forceinline DWORD readPixel24(DWORD addr) + { + return m_vm32[addr] & 0x00ffffff; + } + + __forceinline DWORD readPixel16(DWORD addr) + { + return (DWORD)m_vm16[addr]; + } + + __forceinline DWORD readPixel16S(DWORD addr) + { + return (DWORD)m_vm16[addr]; + } + + __forceinline DWORD readPixel8(DWORD addr) + { + return (DWORD)m_vm8[addr]; + } + + __forceinline DWORD readPixel4(DWORD addr) + { + return (m_vm8[addr>>1] >> ((addr&1) << 2)) & 0x0f; + } + + __forceinline DWORD readPixel8H(DWORD addr) + { + return m_vm32[addr] >> 24; + } + + __forceinline DWORD readPixel4HL(DWORD addr) + { + return (m_vm32[addr] >> 24) & 0x0f; + } + + __forceinline DWORD readPixel4HH(DWORD addr) + { + return (m_vm32[addr] >> 28) & 0x0f; + } + + __forceinline DWORD readPixel32Z(DWORD addr) + { + return m_vm32[addr]; + } + + __forceinline DWORD readPixel24Z(DWORD addr) + { + return m_vm32[addr] & 0x00ffffff; + } + + __forceinline DWORD readPixel16Z(DWORD addr) + { + return (DWORD)m_vm16[addr]; + } + + __forceinline DWORD readPixel16SZ(DWORD addr) + { + return (DWORD)m_vm16[addr]; + } + + __forceinline DWORD readPixel32(int x, int y, DWORD bp, DWORD bw) + { + return readPixel32(pixelAddress32(x, y, bp, bw)); + } + + __forceinline DWORD readPixel24(int x, int y, DWORD bp, DWORD bw) + { + return readPixel24(pixelAddress32(x, y, bp, bw)); + } + + __forceinline DWORD readPixel16(int x, int y, DWORD bp, DWORD bw) + { + return readPixel16(pixelAddress16(x, y, bp, bw)); + } + + __forceinline DWORD readPixel16S(int x, int y, DWORD bp, DWORD bw) + { + return readPixel16S(pixelAddress16S(x, y, bp, bw)); + } + + __forceinline DWORD readPixel8(int x, int y, DWORD bp, DWORD bw) + { + return readPixel8(pixelAddress8(x, y, bp, bw)); + } + + __forceinline DWORD readPixel4(int x, int y, DWORD bp, DWORD bw) + { + return readPixel4(pixelAddress4(x, y, bp, bw)); + } + + __forceinline DWORD readPixel8H(int x, int y, DWORD bp, DWORD bw) + { + return readPixel8H(pixelAddress32(x, y, bp, bw)); + } + + __forceinline DWORD readPixel4HL(int x, int y, DWORD bp, DWORD bw) + { + return readPixel4HL(pixelAddress32(x, y, bp, bw)); + } + + __forceinline DWORD readPixel4HH(int x, int y, DWORD bp, DWORD bw) + { + return readPixel4HH(pixelAddress32(x, y, bp, bw)); + } + + __forceinline DWORD readPixel32Z(int x, int y, DWORD bp, DWORD bw) + { + return readPixel32Z(pixelAddress32Z(x, y, bp, bw)); + } + + __forceinline DWORD readPixel24Z(int x, int y, DWORD bp, DWORD bw) + { + return readPixel24Z(pixelAddress32Z(x, y, bp, bw)); + } + + __forceinline DWORD readPixel16Z(int x, int y, DWORD bp, DWORD bw) + { + return readPixel16Z(pixelAddress16Z(x, y, bp, bw)); + } + + __forceinline DWORD readPixel16SZ(int x, int y, DWORD bp, DWORD bw) + { + return readPixel16SZ(pixelAddress16SZ(x, y, bp, bw)); + } + + __forceinline void writePixel32(DWORD addr, DWORD c) + { + m_vm32[addr] = c; + } + + __forceinline void writePixel24(DWORD addr, DWORD c) + { + m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff); + } + + __forceinline void writePixel16(DWORD addr, DWORD c) + { + m_vm16[addr] = (WORD)c; + } + + __forceinline void writePixel16S(DWORD addr, DWORD c) + { + m_vm16[addr] = (WORD)c; + } + + __forceinline void writePixel8(DWORD addr, DWORD c) + { + m_vm8[addr] = (BYTE)c; + } + + __forceinline void writePixel4(DWORD addr, DWORD c) + { + int shift = (addr&1) << 2; addr >>= 1; + m_vm8[addr] = (BYTE)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift)); + } + + __forceinline void writePixel8H(DWORD addr, DWORD c) + { + m_vm32[addr] = (m_vm32[addr] & 0x00ffffff) | (c << 24); + } + + __forceinline void writePixel4HL(DWORD addr, DWORD c) + { + m_vm32[addr] = (m_vm32[addr] & 0xf0ffffff) | ((c & 0x0f) << 24); + } + + __forceinline void writePixel4HH(DWORD addr, DWORD c) + { + m_vm32[addr] = (m_vm32[addr] & 0x0fffffff) | ((c & 0x0f) << 28); + } + + __forceinline void writePixel32Z(DWORD addr, DWORD c) + { + m_vm32[addr] = c; + } + + __forceinline void writePixel24Z(DWORD addr, DWORD c) + { + m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff); + } + + __forceinline void writePixel16Z(DWORD addr, DWORD c) + { + m_vm16[addr] = (WORD)c; + } + + __forceinline void writePixel16SZ(DWORD addr, DWORD c) + { + m_vm16[addr] = (WORD)c; + } + + __forceinline void writeFrame16(DWORD addr, DWORD c) + { + writePixel16(addr, ((c>>16)&0x8000) | ((c>>9)&0x7c00) | ((c>>6)&0x03e0) | ((c>>3)&0x001f)); + } + + __forceinline void writeFrame16S(DWORD addr, DWORD c) + { + writePixel16S(addr, ((c>>16)&0x8000) | ((c>>9)&0x7c00) | ((c>>6)&0x03e0) | ((c>>3)&0x001f)); + } + + __forceinline void writePixel32(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel32(pixelAddress32(x, y, bp, bw), c); + } + + __forceinline void writePixel24(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel24(pixelAddress32(x, y, bp, bw), c); + } + + __forceinline void writePixel16(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel16(pixelAddress16(x, y, bp, bw), c); + } + + __forceinline void writePixel16S(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel16S(pixelAddress16S(x, y, bp, bw), c); + } + + __forceinline void writePixel8(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel8(pixelAddress8(x, y, bp, bw), c); + } + + __forceinline void writePixel4(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel4(pixelAddress4(x, y, bp, bw), c); + } + + __forceinline void writePixel8H(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel8H(pixelAddress32(x, y, bp, bw), c); + } + + __forceinline void writePixel4HL(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel4HL(pixelAddress32(x, y, bp, bw), c); + } + + __forceinline void writePixel4HH(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel4HH(pixelAddress32(x, y, bp, bw), c); + } + + __forceinline void writePixel32Z(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel32Z(pixelAddress32Z(x, y, bp, bw), c); + } + + __forceinline void writePixel24Z(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel24Z(pixelAddress32Z(x, y, bp, bw), c); + } + + __forceinline void writePixel16Z(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel16Z(pixelAddress16Z(x, y, bp, bw), c); + } + + __forceinline void writePixel16SZ(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writePixel16SZ(pixelAddress16SZ(x, y, bp, bw), c); + } + + __forceinline void writeFrame16(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writeFrame16(pixelAddress16(x, y, bp, bw), c); + } + + __forceinline void writeFrame16S(int x, int y, DWORD c, DWORD bp, DWORD bw) + { + writeFrame16S(pixelAddress16S(x, y, bp, bw), c); + } + + __forceinline DWORD readTexel32(DWORD addr, GIFRegTEXA& TEXA) + { + return m_vm32[addr]; + } + + __forceinline DWORD readTexel24(DWORD addr, GIFRegTEXA& TEXA) + { + return Expand24To32(m_vm32[addr], TEXA); + } + + __forceinline DWORD readTexel16(DWORD addr, GIFRegTEXA& TEXA) + { + return Expand16To32(m_vm16[addr], TEXA); + } + + __forceinline DWORD readTexel16S(DWORD addr, GIFRegTEXA& TEXA) + { + return Expand16To32(m_vm16[addr], TEXA); + } + + __forceinline DWORD readTexel8(DWORD addr, GIFRegTEXA& TEXA) + { + return m_pCLUT32[readPixel8(addr)]; + } + + __forceinline DWORD readTexel4(DWORD addr, GIFRegTEXA& TEXA) + { + return m_pCLUT32[readPixel4(addr)]; + } + + __forceinline DWORD readTexel8H(DWORD addr, GIFRegTEXA& TEXA) + { + return m_pCLUT32[readPixel8H(addr)]; + } + + __forceinline DWORD readTexel4HL(DWORD addr, GIFRegTEXA& TEXA) + { + return m_pCLUT32[readPixel4HL(addr)]; + } + + __forceinline DWORD readTexel4HH(DWORD addr, GIFRegTEXA& TEXA) + { + return m_pCLUT32[readPixel4HH(addr)]; + } + + __forceinline DWORD readTexel32(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel32(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel24(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel24(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel16(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel16(pixelAddress16(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel16S(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel16S(pixelAddress16S(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel8(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel8(pixelAddress8(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel4(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel4(pixelAddress4(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel8H(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel8H(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel4HL(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel4HL(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel4HH(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readTexel4HH(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA); + } + + __forceinline DWORD readTexel16P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel16(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel16SP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel16S(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel8P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel8(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel8HP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel8H(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel4P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel4(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel4HLP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel4HL(x, y, TEX0.TBP0, TEX0.TBW); + } + + __forceinline DWORD readTexel4HHP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + return readPixel4HH(x, y, TEX0.TBP0, TEX0.TBW); + } + + // + + __forceinline DWORD readPixelX(int PSM, DWORD addr) + { + switch(PSM) + { + case PSM_PSMCT32: return readPixel32(addr); + case PSM_PSMCT24: return readPixel24(addr); + case PSM_PSMCT16: return readPixel16(addr); + case PSM_PSMCT16S: return readPixel16S(addr); + case PSM_PSMT8: return readPixel8(addr); + case PSM_PSMT4: return readPixel4(addr); + case PSM_PSMT8H: return readPixel8H(addr); + case PSM_PSMT4HL: return readPixel4HL(addr); + case PSM_PSMT4HH: return readPixel4HH(addr); + case PSM_PSMZ32: return readPixel32Z(addr); + case PSM_PSMZ24: return readPixel24Z(addr); + case PSM_PSMZ16: return readPixel16Z(addr); + case PSM_PSMZ16S: return readPixel16SZ(addr); + default: ASSERT(0); return readPixel32(addr); + } + } + + __forceinline DWORD readTexelX(int PSM, DWORD addr, GIFRegTEXA& TEXA) + { + switch(PSM) + { + case PSM_PSMCT32: return readTexel32(addr, TEXA); + case PSM_PSMCT24: return readTexel24(addr, TEXA); + case PSM_PSMCT16: return readTexel16(addr, TEXA); + case PSM_PSMCT16S: return readTexel16S(addr, TEXA); + case PSM_PSMT8: return readTexel8(addr, TEXA); + case PSM_PSMT4: return readTexel4(addr, TEXA); + case PSM_PSMT8H: return readTexel8H(addr, TEXA); + case PSM_PSMT4HL: return readTexel4HL(addr, TEXA); + case PSM_PSMT4HH: return readTexel4HH(addr, TEXA); + default: ASSERT(0); return readTexel32(addr, TEXA); + } + } + + __forceinline DWORD readTexelX(int PSM, int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) + { + switch(PSM) + { + case PSM_PSMCT32: return readTexel32(x, y, TEX0, TEXA); + case PSM_PSMCT24: return readTexel24(x, y, TEX0, TEXA); + case PSM_PSMCT16: return readTexel16(x, y, TEX0, TEXA); + case PSM_PSMCT16S: return readTexel16S(x, y, TEX0, TEXA); + case PSM_PSMT8: return readTexel8(x, y, TEX0, TEXA); + case PSM_PSMT4: return readTexel4(x, y, TEX0, TEXA); + case PSM_PSMT8H: return readTexel8H(x, y, TEX0, TEXA); + case PSM_PSMT4HL: return readTexel4HL(x, y, TEX0, TEXA); + case PSM_PSMT4HH: return readTexel4HH(x, y, TEX0, TEXA); + default: ASSERT(0); return readTexel32(x, y, TEX0, TEXA); + } + } + + __forceinline void writePixelX(int PSM, DWORD addr, DWORD c) + { + switch(PSM) + { + case PSM_PSMCT32: writePixel32(addr, c); break; + case PSM_PSMCT24: writePixel24(addr, c); break; + case PSM_PSMCT16: writePixel16(addr, c); break; + case PSM_PSMCT16S: writePixel16S(addr, c); break; + case PSM_PSMT8: writePixel8(addr, c); break; + case PSM_PSMT4: writePixel4(addr, c); break; + case PSM_PSMT8H: writePixel8H(addr, c); break; + case PSM_PSMT4HL: writePixel4HL(addr, c); break; + case PSM_PSMT4HH: writePixel4HH(addr, c); break; + case PSM_PSMZ32: writePixel32Z(addr, c); break; + case PSM_PSMZ24: writePixel24Z(addr, c); break; + case PSM_PSMZ16: writePixel16Z(addr, c); break; + case PSM_PSMZ16S: writePixel16SZ(addr, c); break; + default: ASSERT(0); writePixel32(addr, c); break; + } + } + + __forceinline void writeFrameX(int PSM, DWORD addr, DWORD c) + { + switch(PSM) + { + case PSM_PSMCT32: writePixel32(addr, c); break; + case PSM_PSMCT24: writePixel24(addr, c); break; + case PSM_PSMCT16: writeFrame16(addr, c); break; + case PSM_PSMCT16S: writeFrame16S(addr, c); break; + default: ASSERT(0); writePixel32(addr, c); break; + } + } + + // FillRect + + bool FillRect(const CRect& r, DWORD c, DWORD psm, DWORD fbp, DWORD fbw); + + // CLUT + + void InvalidateCLUT() {m_fCLUTMayBeDirty = true;} + bool IsCLUTDirty(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT); + bool WriteCLUT(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT); + + void ReadCLUT(GIFRegTEX0 TEX0, DWORD* pCLUT32); + void SetupCLUT(GIFRegTEX0 TEX0); + + // expands 16->32 + + void ReadCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, DWORD* pCLUT32); + void SetupCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA); + void CopyCLUT32(DWORD* pCLUT32, int nPaletteEntries); + + // + + void SwizzleTexture32(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture24(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture16(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture16S(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture8(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture8H(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture4(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTexture4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + void SwizzleTextureX(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); + + void unSwizzleTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + + void ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP); + + // 32/16/8P + + void unSwizzleTexture16P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture16SP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture8P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture8HP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + + void ReadTextureP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP); + + // 32/16 + + void unSwizzleTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + void unSwizzleTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA); + + void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP); + + // + + static DWORD m_xtbl[1024], m_ytbl[1024]; + + template void ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, unSwizzleTexture st); + + // HRESULT SaveBMP(ID3D10Device* dev, LPCTSTR fn, DWORD bp, DWORD bw, DWORD psm, int w, int h); +}; + +#pragma warning(default: 4244) \ No newline at end of file diff --git a/gsdx/GSPerfMon.cpp b/gsdx/GSPerfMon.cpp new file mode 100644 index 0000000..9e1c549 --- /dev/null +++ b/gsdx/GSPerfMon.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSPerfMon.h" + +extern "C" unsigned __int64 __rdtsc(); + +GSPerfMon::GSPerfMon() + : m_total(0) + , m_begin(0) + , m_frame(0) + , m_lastframe(0) + , m_count(0) +{ + memset(m_counters, 0, sizeof(m_counters)); + memset(m_stats, 0, sizeof(m_stats)); + memset(m_warnings, 0, sizeof(m_warnings)); +} + +void GSPerfMon::Put(counter_t c, double val) +{ + if(c == Frame) + { + clock_t now = clock(); + + if(m_lastframe != 0) + { + m_counters[c] += now - m_lastframe; + } + + m_lastframe = now; + m_frame++; + m_count++; + } + else + { + m_counters[c] += val; + } +} + +void GSPerfMon::Update() +{ + if(m_count > 0) + { + for(int i = 0; i < countof(m_counters); i++) + { + m_stats[i] = m_counters[i] / m_count; + } + + m_count = 0; + } + + memset(m_counters, 0, sizeof(m_counters)); +} + +void GSPerfMon::Start() +{ + m_start = __rdtsc(); + + if(m_begin == 0) + { + m_begin = m_start; + } +} + +void GSPerfMon::Stop() +{ + if(m_start > 0) + { + m_total += __rdtsc() - m_start; + m_start = 0; + } +} + +int GSPerfMon::CPU() +{ + int percent = (int)(100 * m_total / (__rdtsc() - m_begin)); + + m_begin = 0; + m_start = 0; + m_total = 0; + + return percent; +} \ No newline at end of file diff --git a/gsdx/GSPerfMon.h b/gsdx/GSPerfMon.h new file mode 100644 index 0000000..07cc15c --- /dev/null +++ b/gsdx/GSPerfMon.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "x86.h" + +class GSPerfMon +{ +public: + enum counter_t {Frame, Prim, Draw, Swizzle, Unswizzle, Unswizzle2, Texture, ConvertRT2T, ReadRT, WriteRT, WriteTexture, CounterLast}; + enum warning_t {DATE, PABE, ABE, COLCLAMP, DepthTexture, WarningLast}; + +protected: + double m_counters[CounterLast]; + double m_stats[CounterLast]; + bool m_warnings[WarningLast]; + UINT64 m_begin, m_total, m_start, m_frame; + clock_t m_lastframe; + int m_count; + + void Start(); + void Stop(); + + friend class GSPerfMonAutoTimer; + +public: + GSPerfMon(); + + void SetFrame(UINT64 frame) {m_frame = frame;} + UINT64 GetFrame() {return m_frame;} + void Put(counter_t c, double val = 0); + double Get(counter_t c) {return m_stats[c];} + void Put(warning_t c) {m_warnings[c] = true;} + bool Get(warning_t c) {bool b = m_warnings[c]; m_warnings[c] = false; return b;} + void Update(); + int CPU(); +}; + +class GSPerfMonAutoTimer +{ + GSPerfMon* m_pm; + +public: + GSPerfMonAutoTimer(GSPerfMon& pm) {(m_pm = &pm)->Start();} + ~GSPerfMonAutoTimer() {m_pm->Stop();} +}; diff --git a/gsdx/GSScale.h b/gsdx/GSScale.h new file mode 100644 index 0000000..efe849b --- /dev/null +++ b/gsdx/GSScale.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include + +struct GSScale +{ + float x, y; + struct GSScale() {x = y = 1;} + struct GSScale(float x, float y) {this->x = x; this->y = y;} + bool operator == (const struct GSScale& s) {return fabs(x - s.x) < 0.001 && fabs(y - s.y) < 0.001;} +}; + diff --git a/gsdx/GSState.cpp b/gsdx/GSState.cpp new file mode 100644 index 0000000..88bdc08 --- /dev/null +++ b/gsdx/GSState.cpp @@ -0,0 +1,1314 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSState.h" + +GSState::GSState(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : m_mt(mt) + , m_irq(irq) + , m_nloophack(nloophack) + , m_path3hack(0) + , m_q(1.0f) + , m_version(4) + , m_vmsize(4 * 1024 * 1024) +{ + m_sssize = sizeof(m_version) + sizeof(m_env) + sizeof(m_v) + sizeof(m_x) + sizeof(m_y) + m_vmsize + sizeof(m_path) + sizeof(m_q); + + ASSERT(base); + + PMODE = (GSRegPMODE*)(base + GS_PMODE); + SMODE1 = (GSRegSMODE1*)(base + GS_SMODE1); + SMODE2 = (GSRegSMODE2*)(base + GS_SMODE2); + // SRFSH = (GSRegPMODE*)(base + GS_SRFSH); + // SYNCH1 = (GSRegPMODE*)(base + GS_SYNCH1); + // SYNCH2 = (GSRegPMODE*)(base + GS_SYNCH2); + // SYNCV = (GSRegPMODE*)(base + GS_SYNCV); + DISPFB[0] = (GSRegDISPFB*)(base + GS_DISPFB1); + DISPFB[1] = (GSRegDISPFB*)(base + GS_DISPFB2); + DISPLAY[0] = (GSRegDISPLAY*)(base + GS_DISPLAY1); + DISPLAY[1] = (GSRegDISPLAY*)(base + GS_DISPLAY2); + EXTBUF = (GSRegEXTBUF*)(base + GS_EXTBUF); + EXTDATA = (GSRegEXTDATA*)(base + GS_EXTDATA); + EXTWRITE = (GSRegEXTWRITE*)(base + GS_EXTWRITE); + BGCOLOR = (GSRegBGCOLOR*)(base + GS_BGCOLOR); + CSR = (GSRegCSR*)(base + GS_CSR); + IMR = (GSRegIMR*)(base + GS_IMR); + BUSDIR = (GSRegBUSDIR*)(base + GS_BUSDIR); + SIGLBLID = (GSRegSIGLBLID*)(base + GS_SIGLBLID); + + PRIM = &m_env.PRIM; +// CSR->rREV = 0x20; + m_env.PRMODECONT.AC = 1; + + m_x = m_y = 0; + m_bytes = 0; + m_maxbytes = 1024 * 1024 * 4; + m_buff = (BYTE*)_aligned_malloc(m_maxbytes, 16); + + Reset(); + + ResetHandlers(); +} + +GSState::~GSState() +{ + _aligned_free(m_buff); +} + +void GSState::Reset() +{ + memset(&m_env, 0, sizeof(m_env)); + memset(m_path, 0, sizeof(m_path)); + memset(&m_v, 0, sizeof(m_v)); + +// PRIM = &m_env.PRIM; +// m_env.PRMODECONT.AC = 1; + + m_context = &m_env.CTXT[0]; + + m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM]; + m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM]; + m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM]; + + m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM]; + m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM]; + m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM]; +} + +void GSState::ResetHandlers() +{ + for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++) + { + m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull; + } + + m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = &GSState::GIFPackedRegHandlerPRIM; + m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; + m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; + m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; + m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = &GSState::GIFPackedRegHandlerTEX0<0>; + m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = &GSState::GIFPackedRegHandlerTEX0<1>; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerCLAMP<0>; + m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerCLAMP<1>; + m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; + m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF3; + m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ3; + m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D; + m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP; + + for(int i = 0; i < countof(m_fpGIFRegHandlers); i++) + { + m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull; + } + + m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; + m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; + m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; + m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3; + m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3; + m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; + m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT; + m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA; + m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL; + m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH; + m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX; + m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE; + m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP; + m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE; + m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>; + m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>; + m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG; + m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR; + m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG; + m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL; + m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH; + m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL; +} + +CPoint GSState::GetDisplayPos(int i) +{ + ASSERT(i >= 0 && i < 2); + + CPoint p; + + p.x = DISPLAY[i]->DX / (DISPLAY[i]->MAGH + 1); + p.y = DISPLAY[i]->DY / (DISPLAY[i]->MAGV + 1); + + return p; +} + +CSize GSState::GetDisplaySize(int i) +{ + ASSERT(i >= 0 && i < 2); + + CSize s; + + s.cx = (DISPLAY[i]->DW + 1) / (DISPLAY[i]->MAGH + 1); + s.cy = (DISPLAY[i]->DH + 1) / (DISPLAY[i]->MAGV + 1); + + if(s.cy & 1) s.cy++; + + return s; +} + +CRect GSState::GetDisplayRect(int i) +{ + return CRect(GetDisplayPos(i), GetDisplaySize(i)); +} + +CSize GSState::GetDisplayPos() +{ + return GetDisplayPos(IsEnabled(1) ? 1 : 0); +} + +CSize GSState::GetDisplaySize() +{ + return GetDisplaySize(IsEnabled(1) ? 1 : 0); +} + +CRect GSState::GetDisplayRect() +{ + return GetDisplayRect(IsEnabled(1) ? 1 : 0); +} + +CPoint GSState::GetFramePos(int i) +{ + ASSERT(i >= 0 && i < 2); + + return CPoint(DISPFB[i]->DBX, DISPFB[i]->DBY); +} + +CSize GSState::GetFrameSize(int i) +{ + CSize s = GetDisplaySize(i); + + if(SMODE2->INT && SMODE2->FFMD && s.cy > 1) s.cy >>= 1; + + return s; +} + +CRect GSState::GetFrameRect(int i) +{ + return CRect(GetFramePos(i), GetFrameSize(i)); +} + +CSize GSState::GetFramePos() +{ + return GetFramePos(IsEnabled(1) ? 1 : 0); +} + +CSize GSState::GetFrameSize() +{ + return GetFrameSize(IsEnabled(1) ? 1 : 0); +} + +CRect GSState::GetFrameRect() +{ + return GetFrameRect(IsEnabled(1) ? 1 : 0); +} + +bool GSState::IsEnabled(int i) +{ + ASSERT(i >= 0 && i < 2); + + if(i == 0 && PMODE->EN1) + { + return DISPLAY[0]->DW || DISPLAY[0]->DH; + } + else if(i == 1 && PMODE->EN2) + { + return DISPLAY[1]->DW || DISPLAY[1]->DH; + } + + return false; +} + +int GSState::GetFPS() +{ + return ((SMODE1->CMOD & 1) ? 50 : 60) / (SMODE2->INT ? 1 : 2); +} + +// + +static __m128i _000000ff = _mm_set1_epi32(0x000000ff); +static __m128i _00003fff = _mm_set1_epi32(0x00003fff); + +// GIFPackedRegHandler* + +void GSState::GIFPackedRegHandlerNull(GIFPackedReg* r) +{ + // ASSERT(0); +} + +void GSState::GIFPackedRegHandlerPRIM(GIFPackedReg* r) +{ + ASSERT(r->PRIM.PRIM < 7); + + GIFReg r2; + r2.PRIM.i64 = r->PRIM.PRIM; + GIFRegHandlerPRIM(&r2); +} + +void GSState::GIFPackedRegHandlerRGBA(GIFPackedReg* r) +{ +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + + __m128i r0 = _mm_loadu_si128((__m128i*)r); + r0 = _mm_and_si128(r0, _000000ff); + r0 = _mm_packs_epi32(r0, r0); + r0 = _mm_packus_epi16(r0, r0); + m_v.RGBAQ.ai32[0] = _mm_cvtsi128_si32(r0); + +#else + + m_v.RGBAQ.R = r->RGBA.R; + m_v.RGBAQ.G = r->RGBA.G; + m_v.RGBAQ.B = r->RGBA.B; + m_v.RGBAQ.A = r->RGBA.A; + +#endif + + m_v.RGBAQ.Q = m_q; +} + +void GSState::GIFPackedRegHandlerSTQ(GIFPackedReg* r) +{ +#if defined(_M_AMD64) + + m_v.ST.i64 = r->ai64[0]; + +#elif _M_IX86_FP >= 2 + + _mm_storel_epi64((__m128i*)&m_v.ST.i64, _mm_loadl_epi64((__m128i*)r)); + +#else + + m_v.ST.S = r->STQ.S; + m_v.ST.T = r->STQ.T; + +#endif + + m_q = r->STQ.Q; +} + +void GSState::GIFPackedRegHandlerUV(GIFPackedReg* r) +{ +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + + __m128i r0 = _mm_loadu_si128((__m128i*)r); + r0 = _mm_and_si128(r0, _00003fff); + r0 = _mm_packs_epi32(r0, r0); + m_v.UV.ai32[0] = _mm_cvtsi128_si32(r0); + +#else + + m_v.UV.U = r->UV.U; + m_v.UV.V = r->UV.V; + +#endif +} + +void GSState::GIFPackedRegHandlerXYZF2(GIFPackedReg* r) +{ + m_v.XYZ.X = r->XYZF2.X; + m_v.XYZ.Y = r->XYZF2.Y; + m_v.XYZ.Z = r->XYZF2.Z; + m_v.FOG.F = r->XYZF2.F; + + VertexKick(r->XYZF2.ADC); +} + +void GSState::GIFPackedRegHandlerXYZ2(GIFPackedReg* r) +{ + m_v.XYZ.X = r->XYZ2.X; + m_v.XYZ.Y = r->XYZ2.Y; + m_v.XYZ.Z = r->XYZ2.Z; + + VertexKick(r->XYZ2.ADC); +} + +template void GSState::GIFPackedRegHandlerTEX0(GIFPackedReg* r) +{ + GIFRegHandlerTEX0((GIFReg*)&r->ai64[0]); +} + +template void GSState::GIFPackedRegHandlerCLAMP(GIFPackedReg* r) +{ + GIFRegHandlerCLAMP((GIFReg*)&r->ai64[0]); +} + +void GSState::GIFPackedRegHandlerFOG(GIFPackedReg* r) +{ + m_v.FOG.F = r->FOG.F; +} + +void GSState::GIFPackedRegHandlerXYZF3(GIFPackedReg* r) +{ + GIFRegHandlerXYZF3((GIFReg*)&r->ai64[0]); +} + +void GSState::GIFPackedRegHandlerXYZ3(GIFPackedReg* r) +{ + GIFRegHandlerXYZ3((GIFReg*)&r->ai64[0]); +} + +void GSState::GIFPackedRegHandlerA_D(GIFPackedReg* r) +{ + (this->*m_fpGIFRegHandlers[(BYTE)r->A_D.ADDR])(&r->r); +} + +void GSState::GIFPackedRegHandlerNOP(GIFPackedReg* r) +{ +} + +// GIFRegHandler* + +void GSState::GIFRegHandlerNull(GIFReg* r) +{ + // ASSERT(0); +} + +void GSState::GIFRegHandlerPRIM(GIFReg* r) +{ + // ASSERT(r->PRIM.PRIM < 7); + + if(m_env.PRIM.i64 != r->PRIM.i64) + { + Flush(); + } + + m_env.PRIM = r->PRIM; + m_env.PRMODE._PRIM = r->PRIM.PRIM; + + if(m_env.PRMODECONT.AC) + { + m_context = &m_env.CTXT[m_env.PRIM.CTXT]; + } + + ResetPrim(); +} + +void GSState::GIFRegHandlerRGBAQ(GIFReg* r) +{ + m_v.RGBAQ = r->RGBAQ; +} + +void GSState::GIFRegHandlerST(GIFReg* r) +{ + m_v.ST = r->ST; +} + +void GSState::GIFRegHandlerUV(GIFReg* r) +{ + m_v.UV = r->UV; +} + +void GSState::GIFRegHandlerXYZF2(GIFReg* r) +{ +/* + m_v.XYZ.X = r->XYZF.X; + m_v.XYZ.Y = r->XYZF.Y; + m_v.XYZ.Z = r->XYZF.Z; + m_v.FOG.F = r->XYZF.F; +*/ + m_v.XYZ.ai32[0] = r->XYZF.ai32[0]; + m_v.XYZ.ai32[1] = r->XYZF.ai32[1] & 0x00ffffff; + m_v.FOG.ai32[1] = r->XYZF.ai32[1] & 0xff000000; + + VertexKick(false); +} + +void GSState::GIFRegHandlerXYZ2(GIFReg* r) +{ + m_v.XYZ = r->XYZ; + + VertexKick(false); +} + +template void GSState::GIFRegHandlerTEX0(GIFReg* r) +{ + // even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing + + if(PRIM->CTXT == i && m_env.CTXT[i].TEX0.i64 != r->TEX0.i64 + || r->TEX0.CLD >= 1 && r->TEX0.CLD <= 3 && m_mem.IsCLUTDirty(r->TEX0, m_env.TEXCLUT)) + { + Flush(); + } + + m_env.CTXT[i].TEX0 = r->TEX0; + + // ASSERT(m_env.CTXT[i].TEX0.TW <= 10 && m_env.CTXT[i].TEX0.TH <= 10 && (m_env.CTXT[i].TEX0.CPSM & ~0xa) == 0); + + if(m_env.CTXT[i].TEX0.TW > 10) m_env.CTXT[i].TEX0.TW = 10; + if(m_env.CTXT[i].TEX0.TH > 10) m_env.CTXT[i].TEX0.TH = 10; + + m_env.CTXT[i].TEX0.CPSM &= 0xa; // 1010b + + m_env.CTXT[i].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[i].TEX0.PSM]; + + FlushWrite(); + + m_mem.WriteCLUT(r->TEX0, m_env.TEXCLUT); +} + +template void GSState::GIFRegHandlerCLAMP(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].CLAMP.i64 != r->CLAMP.i64) + { + Flush(); + } + + m_env.CTXT[i].CLAMP = r->CLAMP; +} + +void GSState::GIFRegHandlerFOG(GIFReg* r) +{ + m_v.FOG = r->FOG; +} + +void GSState::GIFRegHandlerXYZF3(GIFReg* r) +{ +/* + m_v.XYZ.X = r->XYZF.X; + m_v.XYZ.Y = r->XYZF.Y; + m_v.XYZ.Z = r->XYZF.Z; + m_v.FOG.F = r->XYZF.F; +*/ + m_v.XYZ.ai32[0] = r->XYZF.ai32[0]; + m_v.XYZ.ai32[1] = r->XYZF.ai32[1] & 0x00ffffff; + m_v.FOG.ai32[1] = r->XYZF.ai32[1] & 0xff000000; + + VertexKick(true); +} + +void GSState::GIFRegHandlerXYZ3(GIFReg* r) +{ + m_v.XYZ = r->XYZ; + + VertexKick(true); +} + +void GSState::GIFRegHandlerNOP(GIFReg* r) +{ +} + +template void GSState::GIFRegHandlerTEX1(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].TEX1.i64 != r->TEX1.i64) + { + Flush(); + } + + m_env.CTXT[i].TEX1 = r->TEX1; +} + +template void GSState::GIFRegHandlerTEX2(GIFReg* r) +{ + // m_env.CTXT[i].TEX2 = r->TEX2; // not used + + UINT64 mask = 0xFFFFFFE003F00000ui64; // TEX2 bits + + r->i64 = (r->i64 & mask) | (m_env.CTXT[i].TEX0.i64 & ~mask); + + GIFRegHandlerTEX0(r); +} + +template void GSState::GIFRegHandlerXYOFFSET(GIFReg* r) +{ + if(m_env.CTXT[i].XYOFFSET.i64 != r->XYOFFSET.i64) + { + Flush(); + } + + m_env.CTXT[i].XYOFFSET = r->XYOFFSET; + + m_env.CTXT[i].UpdateScissor(); +} + +void GSState::GIFRegHandlerPRMODECONT(GIFReg* r) +{ + if(m_env.PRMODECONT.i64 != r->PRMODECONT.i64) + { + Flush(); + } + + m_env.PRMODECONT = r->PRMODECONT; + + PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; + + ASSERT(PRIM->PRIM < 7); + + m_context = &m_env.CTXT[PRIM->CTXT]; +} + +void GSState::GIFRegHandlerPRMODE(GIFReg* r) +{ + if(!m_env.PRMODECONT.AC) + { + Flush(); + } + + UINT32 _PRIM = m_env.PRMODE._PRIM; + m_env.PRMODE = r->PRMODE; + m_env.PRMODE._PRIM = _PRIM; + + m_context = &m_env.CTXT[PRIM->CTXT]; +} + +void GSState::GIFRegHandlerTEXCLUT(GIFReg* r) +{ + if(m_env.TEXCLUT.i64 != r->TEXCLUT.i64) + { + Flush(); + } + + m_env.TEXCLUT = r->TEXCLUT; +} + +void GSState::GIFRegHandlerSCANMSK(GIFReg* r) +{ + if(m_env.SCANMSK.i64 != r->SCANMSK.i64) + { + Flush(); + } + + m_env.SCANMSK = r->SCANMSK; +} + +template void GSState::GIFRegHandlerMIPTBP1(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP1.i64 != r->MIPTBP1.i64) + { + Flush(); + } + + m_env.CTXT[i].MIPTBP1 = r->MIPTBP1; +} + +template void GSState::GIFRegHandlerMIPTBP2(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP2.i64 != r->MIPTBP2.i64) + { + Flush(); + } + + m_env.CTXT[i].MIPTBP2 = r->MIPTBP2; +} + +void GSState::GIFRegHandlerTEXA(GIFReg* r) +{ + if(m_env.TEXA.i64 != r->TEXA.i64) + { + Flush(); + } + + m_env.TEXA = r->TEXA; +} + +void GSState::GIFRegHandlerFOGCOL(GIFReg* r) +{ + if(m_env.FOGCOL.i64 != r->FOGCOL.i64) + { + Flush(); + } + + m_env.FOGCOL = r->FOGCOL; +} + +void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r) +{ + // what should we do here? +} + +template void GSState::GIFRegHandlerSCISSOR(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].SCISSOR.i64 != r->SCISSOR.i64) + { + Flush(); + } + + m_env.CTXT[i].SCISSOR = r->SCISSOR; + + m_env.CTXT[i].UpdateScissor(); +} + +template void GSState::GIFRegHandlerALPHA(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].ALPHA.i64 != r->ALPHA.i64) + { + Flush(); + } + + m_env.CTXT[i].ALPHA = r->ALPHA; +} + +void GSState::GIFRegHandlerDIMX(GIFReg* r) +{ + if(m_env.DIMX.i64 != r->DIMX.i64) + { + Flush(); + } + + m_env.DIMX = r->DIMX; +} + +void GSState::GIFRegHandlerDTHE(GIFReg* r) +{ + if(m_env.DTHE.i64 != r->DTHE.i64) + { + Flush(); + } + + m_env.DTHE = r->DTHE; +} + +void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r) +{ + if(m_env.COLCLAMP.i64 != r->COLCLAMP.i64) + { + Flush(); + } + + m_env.COLCLAMP = r->COLCLAMP; +} + +template void GSState::GIFRegHandlerTEST(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].TEST.i64 != r->TEST.i64) + { + Flush(); + } + + m_env.CTXT[i].TEST = r->TEST; +} + +void GSState::GIFRegHandlerPABE(GIFReg* r) +{ + if(m_env.PABE.i64 != r->PABE.i64) + { + Flush(); + } + + m_env.PABE = r->PABE; +} + +template void GSState::GIFRegHandlerFBA(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].FBA.i64 != r->FBA.i64) + { + Flush(); + } + + m_env.CTXT[i].FBA = r->FBA; +} + +template void GSState::GIFRegHandlerFRAME(GIFReg* r) +{ + if(PRIM->CTXT == i && m_env.CTXT[i].FRAME.i64 != r->FRAME.i64) + { + Flush(); + } + + m_env.CTXT[i].FRAME = r->FRAME; + + m_env.CTXT[i].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[i].FRAME.PSM]; +} + +template void GSState::GIFRegHandlerZBUF(GIFReg* r) +{ + r->ZBUF.PSM |= 0x30; + + if(PRIM->CTXT == i && m_env.CTXT[i].ZBUF.i64 != r->ZBUF.i64) + { + Flush(); + } + + m_env.CTXT[i].ZBUF = r->ZBUF; + + if(m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ32 + && m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ24 + && m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16 + && m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16S) + { + m_env.CTXT[i].ZBUF.PSM = PSM_PSMZ32; + } + + m_env.CTXT[i].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[i].ZBUF.PSM]; +} + +void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r) +{ + if(m_env.BITBLTBUF.i64 != r->BITBLTBUF.i64) + { + FlushWrite(); + } + + m_env.BITBLTBUF = r->BITBLTBUF; +} + +void GSState::GIFRegHandlerTRXPOS(GIFReg* r) +{ + if(m_env.TRXPOS.i64 != r->TRXPOS.i64) + { + FlushWrite(); + } + + m_env.TRXPOS = r->TRXPOS; +} + +void GSState::GIFRegHandlerTRXREG(GIFReg* r) +{ + if(m_env.TRXREG.i64 != r->TRXREG.i64 || m_env.TRXREG2.i64 != r->TRXREG.i64) + { + FlushWrite(); + } + + m_env.TRXREG = m_env.TRXREG2 = r->TRXREG; +} + +void GSState::GIFRegHandlerTRXDIR(GIFReg* r) +{ + Flush(); + + m_env.TRXDIR = r->TRXDIR; + + switch(m_env.TRXDIR.XDIR) + { + case 0: // host -> local + m_x = m_env.TRXPOS.DSAX; + m_y = m_env.TRXPOS.DSAY; + m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW; + m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH; + break; + case 1: // local -> host + m_x = m_env.TRXPOS.SSAX; + m_y = m_env.TRXPOS.SSAY; + m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW; + m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH; + break; + case 2: // local -> local + Move(); + break; + case 3: + ASSERT(0); + break; + } +} + +void GSState::GIFRegHandlerHWREG(GIFReg* r) +{ + // TODO + + ASSERT(0); +} + +void GSState::GIFRegHandlerSIGNAL(GIFReg* r) +{ + if(m_mt) return; + + SIGLBLID->SIGID = (SIGLBLID->SIGID & ~r->SIGNAL.IDMSK) | (r->SIGNAL.ID & r->SIGNAL.IDMSK); + + if(CSR->wSIGNAL) CSR->rSIGNAL = 1; + if(!IMR->SIGMSK && m_irq) m_irq(); +} + +void GSState::GIFRegHandlerFINISH(GIFReg* r) +{ + if(m_mt) return; + + if(CSR->wFINISH) CSR->rFINISH = 1; + if(!IMR->FINISHMSK && m_irq) m_irq(); +} + +void GSState::GIFRegHandlerLABEL(GIFReg* r) +{ + if(m_mt) return; + + SIGLBLID->LBLID = (SIGLBLID->LBLID & ~r->LABEL.IDMSK) | (r->LABEL.ID & r->LABEL.IDMSK); +} + +// + +void GSState::Flush() +{ + FlushWrite(); + + FlushPrim(); +} + +void GSState::FlushWrite() +{ + FlushWrite(m_buff, m_bytes); + + m_bytes = 0; +} + +void GSState::FlushWrite(BYTE* mem, int len) +{ + if(len > 0) + { + int y = m_y; + + GSLocalMemory::SwizzleTexture st = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].st; + + (m_mem.*st)(m_x, m_y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); + + // TODO: m_perfmon.Put(GSPerfMon::Swizzle, len); + + //ASSERT(m_env.TRXREG.RRH >= m_y - y); + + CRect r; + + r.left = m_env.TRXPOS.DSAX; + r.top = y; + r.right = m_env.TRXREG.RRW; + r.bottom = min(m_x == m_env.TRXPOS.DSAX ? m_y : m_y + 1, m_env.TRXREG.RRH); + + InvalidateTexture(m_env.BITBLTBUF, r); + + m_mem.InvalidateCLUT(); + } +} + +// + +void GSState::Write(BYTE* mem, int len) +{ + if(len == 0) return; + + // TODO: hmmmm + + if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) + { + FlushPrim(); + } + + int bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp; + + int pitch = (m_env.TRXREG.RRW - m_env.TRXPOS.DSAX) * bpp >> 3; + + if(pitch <= 0) {ASSERT(0); return;} + + int height = len / pitch; + + if(height > m_env.TRXREG.RRH - m_env.TRXPOS.DSAY) + { + height = m_env.TRXREG.RRH - m_env.TRXPOS.DSAY; + + len = height * pitch; + } + + if(m_bytes > 0 || height < m_env.TRXREG.RRH - m_env.TRXPOS.DSAY) + { + ASSERT(len <= m_maxbytes); // more than 4mb into a 4mb local mem doesn't make sense + + len = min(m_maxbytes, len); + + if(m_bytes + len > m_maxbytes) + { + FlushWrite(); + } + + memcpy(&m_buff[m_bytes], mem, len); + + m_bytes += len; + } + else + { + FlushWrite(mem, len); + } +} + +void GSState::Read(BYTE* mem, int len) +{ + BYTE* pb = (BYTE*)mem; + WORD* pw = (WORD*)mem; + DWORD* pd = (DWORD*)mem; + + if(m_y >= (int)m_env.TRXREG.RRH) {ASSERT(0); return;} + + if(m_x == m_env.TRXPOS.SSAX && m_y == m_env.TRXPOS.SSAY) + { + CRect r(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.TRXREG.RRW, m_env.TRXREG.RRH); + + InvalidateLocalMem(m_env.BITBLTBUF, r); + } + + switch(m_env.BITBLTBUF.SPSM) + { + case PSM_PSMCT32: + for(len /= 4; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pd++) + *pd = m_mem.readPixel32(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMCT24: + for(len /= 3; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb+=3) + { + DWORD dw = m_mem.readPixel24(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + pb[0] = ((BYTE*)&dw)[0]; pb[1] = ((BYTE*)&dw)[1]; pb[2] = ((BYTE*)&dw)[2]; + } + break; + case PSM_PSMCT16: + for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++) + *pw = (WORD)m_mem.readPixel16(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMCT16S: + for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++) + *pw = (WORD)m_mem.readPixel16S(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMT8: + for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++) + *pb = (BYTE)m_mem.readPixel8(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMT4: + for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++) + *pb = (BYTE)(m_mem.readPixel4(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f) + | (BYTE)(m_mem.readPixel4(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4); + break; + case PSM_PSMT8H: + for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++) + *pb = (BYTE)m_mem.readPixel8H(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMT4HL: + for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++) + *pb = (BYTE)(m_mem.readPixel4HL(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f) + | (BYTE)(m_mem.readPixel4HL(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4); + break; + case PSM_PSMT4HH: + for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++) + *pb = (BYTE)(m_mem.readPixel4HH(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f) + | (BYTE)(m_mem.readPixel4HH(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4); + break; + case PSM_PSMZ32: + for(len /= 4; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pd++) + *pd = m_mem.readPixel32Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMZ24: + for(len /= 3; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb+=3) + { + DWORD dw = m_mem.readPixel24Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + pb[0] = ((BYTE*)&dw)[0]; pb[1] = ((BYTE*)&dw)[1]; pb[2] = ((BYTE*)&dw)[2]; + } + break; + case PSM_PSMZ16: + for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++) + *pw = (WORD)m_mem.readPixel16Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + case PSM_PSMZ16S: + for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++) + *pw = (WORD)m_mem.readPixel16SZ(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + break; + } +} + +void GSState::Move() +{ + // ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect + // guitar hero copies the far end of the board to do a similar blend too + + GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; + GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp; + + int sx = m_env.TRXPOS.SSAX; + int dx = m_env.TRXPOS.DSAX; + int sy = m_env.TRXPOS.SSAY; + int dy = m_env.TRXPOS.DSAY; + int w = m_env.TRXREG.RRW; + int h = m_env.TRXREG.RRH; + int xinc = 1; + int yinc = 1; + + if(sx < dx) sx += w-1, dx += w-1, xinc = -1; + if(sy < dy) sy += h-1, dy += h-1, yinc = -1; + + InvalidateLocalMem(m_env.BITBLTBUF, CRect(CPoint(sx, sy), CSize(w, h))); + InvalidateTexture(m_env.BITBLTBUF, CRect(CPoint(dx, dy), CSize(w, h))); + + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + (m_mem.*wp)(dx, dy, (m_mem.*rp)(sx, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW), m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); +} + +void GSState::WriteCSR(UINT32 csr) +{ + CSR->ai32[1] = csr; +} + +void GSState::ReadFIFO(BYTE* mem, int size) +{ + Flush(); + + Read(mem, size * 16); +} + +void GSState::Transfer(BYTE* mem, int size, int index) +{ + GIFPath& path = m_path[index]; + + while(size > 0) + { + bool eop = false; + + if(path.tag.NLOOP == 0) + { + path.tag = *(GIFTag*)mem; + path.nreg = 0; + + mem += sizeof(GIFTag); + size--; + + m_q = 1.0f; + + if(index == 2 && path.tag.EOP) + { + m_path3hack = 1; + } + + if(path.tag.PRE) + { + GIFReg r; + r.i64 = path.tag.PRIM; + (this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r); + } + + if(path.tag.EOP) + { + eop = true; + } + else if(path.tag.NLOOP == 0) + { + if(index == 0 && m_nloophack) + { + continue; + } + + eop = true; + } + } + + switch(path.tag.FLG) + { + case GIF_FLG_PACKED: + + for(GIFPackedReg* r = (GIFPackedReg*)mem; path.tag.NLOOP > 0 && size > 0; r++, size--, mem += sizeof(GIFPackedReg)) + { + (this->*m_fpGIFPackedRegHandlers[path.GetGIFReg()])(r); + + if((path.nreg = (path.nreg + 1) & 0xf) == path.tag.NREG) + { + path.nreg = 0; + path.tag.NLOOP--; + } + } + + break; + + case GIF_FLG_REGLIST: + + size *= 2; + + for(GIFReg* r = (GIFReg*)mem; path.tag.NLOOP > 0 && size > 0; r++, size--, mem += sizeof(GIFReg)) + { + (this->*m_fpGIFRegHandlers[path.GetGIFReg()])(r); + + if((path.nreg = (path.nreg + 1) & 0xf) == path.tag.NREG) + { + path.nreg = 0; + path.tag.NLOOP--; + } + } + + if(size & 1) mem += sizeof(GIFReg); + + size /= 2; + + break; + + case GIF_FLG_IMAGE2: // hmmm + + path.tag.NLOOP = 0; + + break; + + case GIF_FLG_IMAGE: + { + int len = min(size, path.tag.NLOOP); + + //ASSERT(!(len&3)); + + switch(m_env.TRXDIR.XDIR) + { + case 0: + Write(mem, len*16); + break; + case 1: + Read(mem, len*16); // TODO: writing access violation with aqtime + break; + case 2: + Move(); + break; + case 3: + ASSERT(0); + break; + default: + __assume(0); + } + + mem += len*16; + path.tag.NLOOP -= len; + size -= len; + } + + break; + + default: + __assume(0); + } + + if(eop && ((int)size <= 0 || index == 0)) + { + break; + } + } + + // FIXME: dq8, pcsx2 error probably + + if(index == 0) + { + if(!path.tag.EOP && path.tag.NLOOP > 0) + { + path.tag.NLOOP = 0; + + TRACE(_T("path1 hack\n")); + } + } +} + +int GSState::Freeze(freezeData* fd, bool sizeonly) +{ + if(sizeonly) + { + fd->size = m_sssize; + return 0; + } + + if(!fd->data || fd->size < m_sssize) + { + return -1; + } + + Flush(); + + BYTE* data = fd->data; + + memcpy(data, &m_version, sizeof(m_version)); data += sizeof(m_version); + memcpy(data, &m_env, sizeof(m_env)); data += sizeof(m_env); + memcpy(data, &m_v, sizeof(m_v)); data += sizeof(m_v); + memcpy(data, &m_x, sizeof(m_x)); data += sizeof(m_x); + memcpy(data, &m_y, sizeof(m_y)); data += sizeof(m_y); + memcpy(data, m_mem.GetVM(), m_vmsize); data += m_vmsize; + memcpy(data, m_path, sizeof(m_path)); data += sizeof(m_path); + memcpy(data, &m_q, sizeof(m_q)); data += sizeof(m_q); + + return 0; +} + +int GSState::Defrost(const freezeData* fd) +{ + if(!fd || !fd->data || fd->size == 0) + { + return -1; + } + + if(fd->size != m_vmsize) + { + return -1; + } + + BYTE* data = fd->data; + + if(*(int*)data != m_version) + { + return -1; + } + + data += sizeof(m_version); + + Flush(); + + memcpy(&m_env, data, sizeof(m_env)); data += sizeof(m_env); + memcpy(&m_v, data, sizeof(m_v)); data += sizeof(m_v); + memcpy(&m_x, data, sizeof(m_x)); data += sizeof(m_x); + memcpy(&m_y, data, sizeof(m_y)); data += sizeof(m_y); + memcpy(m_mem.GetVM(), data, m_vmsize); data += m_vmsize; + memcpy(&m_path, data, sizeof(m_path)); data += sizeof(m_path); + memcpy(&m_q, data, sizeof(m_q)); data += sizeof(m_q); + + PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; + + m_context = &m_env.CTXT[PRIM->CTXT]; + + m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM]; + m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM]; + m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM]; + + m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM]; + m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM]; + m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM]; + + return 0; +} + diff --git a/gsdx/GSState.h b/gsdx/GSState.h new file mode 100644 index 0000000..0208f81 --- /dev/null +++ b/gsdx/GSState.h @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GS.h" +#include "GSLocalMemory.h" +#include "GSDrawingContext.h" +#include "GSDrawingEnvironment.h" +#include "GSVertex.h" +#include "GSVertexList.h" +#include "GSUtil.h" +#include "GSDirtyRect.h" +#include "GSPerfMon.h" +#include "GSScale.h" + +class GSState +{ + typedef void (GSState::*GIFPackedRegHandler)(GIFPackedReg* r); + + GIFPackedRegHandler m_fpGIFPackedRegHandlers[16]; + + void GIFPackedRegHandlerNull(GIFPackedReg* r); + void GIFPackedRegHandlerPRIM(GIFPackedReg* r); + void GIFPackedRegHandlerRGBA(GIFPackedReg* r); + void GIFPackedRegHandlerSTQ(GIFPackedReg* r); + void GIFPackedRegHandlerUV(GIFPackedReg* r); + void GIFPackedRegHandlerXYZF2(GIFPackedReg* r); + void GIFPackedRegHandlerXYZ2(GIFPackedReg* r); + template void GIFPackedRegHandlerTEX0(GIFPackedReg* r); + template void GIFPackedRegHandlerCLAMP(GIFPackedReg* r); + void GIFPackedRegHandlerFOG(GIFPackedReg* r); + void GIFPackedRegHandlerXYZF3(GIFPackedReg* r); + void GIFPackedRegHandlerXYZ3(GIFPackedReg* r); + void GIFPackedRegHandlerA_D(GIFPackedReg* r); + void GIFPackedRegHandlerNOP(GIFPackedReg* r); + + typedef void (GSState::*GIFRegHandler)(GIFReg* r); + + GIFRegHandler m_fpGIFRegHandlers[256]; + + void GIFRegHandlerNull(GIFReg* r); + void GIFRegHandlerPRIM(GIFReg* r); + void GIFRegHandlerRGBAQ(GIFReg* r); + void GIFRegHandlerST(GIFReg* r); + void GIFRegHandlerUV(GIFReg* r); + void GIFRegHandlerXYZF2(GIFReg* r); + void GIFRegHandlerXYZ2(GIFReg* r); + template void GIFRegHandlerTEX0(GIFReg* r); + template void GIFRegHandlerCLAMP(GIFReg* r); + void GIFRegHandlerFOG(GIFReg* r); + void GIFRegHandlerXYZF3(GIFReg* r); + void GIFRegHandlerXYZ3(GIFReg* r); + void GIFRegHandlerNOP(GIFReg* r); + template void GIFRegHandlerTEX1(GIFReg* r); + template void GIFRegHandlerTEX2(GIFReg* r); + template void GIFRegHandlerXYOFFSET(GIFReg* r); + void GIFRegHandlerPRMODECONT(GIFReg* r); + void GIFRegHandlerPRMODE(GIFReg* r); + void GIFRegHandlerTEXCLUT(GIFReg* r); + void GIFRegHandlerSCANMSK(GIFReg* r); + template void GIFRegHandlerMIPTBP1(GIFReg* r); + template void GIFRegHandlerMIPTBP2(GIFReg* r); + void GIFRegHandlerTEXA(GIFReg* r); + void GIFRegHandlerFOGCOL(GIFReg* r); + void GIFRegHandlerTEXFLUSH(GIFReg* r); + template void GIFRegHandlerSCISSOR(GIFReg* r); + template void GIFRegHandlerALPHA(GIFReg* r); + void GIFRegHandlerDIMX(GIFReg* r); + void GIFRegHandlerDTHE(GIFReg* r); + void GIFRegHandlerCOLCLAMP(GIFReg* r); + template void GIFRegHandlerTEST(GIFReg* r); + void GIFRegHandlerPABE(GIFReg* r); + template void GIFRegHandlerFBA(GIFReg* r); + template void GIFRegHandlerFRAME(GIFReg* r); + template void GIFRegHandlerZBUF(GIFReg* r); + void GIFRegHandlerBITBLTBUF(GIFReg* r); + void GIFRegHandlerTRXPOS(GIFReg* r); + void GIFRegHandlerTRXREG(GIFReg* r); + void GIFRegHandlerTRXDIR(GIFReg* r); + void GIFRegHandlerHWREG(GIFReg* r); + void GIFRegHandlerSIGNAL(GIFReg* r); + void GIFRegHandlerFINISH(GIFReg* r); + void GIFRegHandlerLABEL(GIFReg* r); + + int m_version; + int m_vmsize; + int m_sssize; + + bool m_mt; + void (*m_irq)(); + bool m_path3hack; + + int m_x, m_y; + int m_bytes; + int m_maxbytes; + BYTE* m_buff; + + void FlushWrite(); + void FlushWrite(BYTE* mem, int len); + void StepTransfer(int sx, int ex) {if(++m_x == ex) {m_x = sx; m_y++;}} + +public: + GIFRegPRIM* PRIM; + GSRegPMODE* PMODE; + GSRegSMODE1* SMODE1; + GSRegSMODE2* SMODE2; + GSRegDISPFB* DISPFB[2]; + GSRegDISPLAY* DISPLAY[2]; + GSRegEXTBUF* EXTBUF; + GSRegEXTDATA* EXTDATA; + GSRegEXTWRITE* EXTWRITE; + GSRegBGCOLOR* BGCOLOR; + GSRegCSR* CSR; + GSRegIMR* IMR; + GSRegBUSDIR* BUSDIR; + GSRegSIGLBLID* SIGLBLID; + + GIFPath m_path[3]; + GSLocalMemory m_mem; + GSDrawingEnvironment m_env; + GSDrawingContext* m_context; + GSVertex m_v; + float m_q; + + bool m_nloophack; + +public: + GSState(BYTE* base, bool mt, void (*irq)(), bool nloophack); + virtual ~GSState(); + + void ResetHandlers(); + + CPoint GetDisplayPos(int i); + CSize GetDisplaySize(int i); + CRect GetDisplayRect(int i); + CSize GetDisplayPos(); + CSize GetDisplaySize(); + CRect GetDisplayRect(); + CPoint GetFramePos(int i); + CSize GetFrameSize(int i); + CRect GetFrameRect(int i); + CSize GetFramePos(); + CSize GetFrameSize(); + CRect GetFrameRect(); + bool IsEnabled(int i); + int GetFPS(); + + virtual void Reset(); + virtual void Flush(); + virtual void FlushPrim() = 0; + virtual void ResetPrim() = 0; + virtual void VertexKick(bool skip) = 0; + virtual void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {} + virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {} + + void Move(); + void Write(BYTE* mem, int len); + void Read(BYTE* mem, int len); + + void WriteCSR(UINT32 csr); + void ReadFIFO(BYTE* mem, int size); + void Transfer(BYTE* mem, int size, int index); + void GetLastTag(UINT32* tag) {*tag = m_path3hack; m_path3hack = 0;} + int Freeze(freezeData* fd, bool sizeonly); + int Defrost(const freezeData* fd); +}; + diff --git a/gsdx/GSTables.cpp b/gsdx/GSTables.cpp new file mode 100644 index 0000000..a73cbec --- /dev/null +++ b/gsdx/GSTables.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSTables.h" + +const DWORD blockTable32[4][8] = { + { 0, 1, 4, 5, 16, 17, 20, 21}, + { 2, 3, 6, 7, 18, 19, 22, 23}, + { 8, 9, 12, 13, 24, 25, 28, 29}, + { 10, 11, 14, 15, 26, 27, 30, 31} +}; + +const DWORD blockTable32Z[4][8] = { + { 24, 25, 28, 29, 8, 9, 12, 13}, + { 26, 27, 30, 31, 10, 11, 14, 15}, + { 16, 17, 20, 21, 0, 1, 4, 5}, + { 18, 19, 22, 23, 2, 3, 6, 7} +}; + +const DWORD blockTable16[8][4] = { + { 0, 2, 8, 10 }, + { 1, 3, 9, 11 }, + { 4, 6, 12, 14 }, + { 5, 7, 13, 15 }, + { 16, 18, 24, 26 }, + { 17, 19, 25, 27 }, + { 20, 22, 28, 30 }, + { 21, 23, 29, 31 } +}; + +const DWORD blockTable16S[8][4] = { + { 0, 2, 16, 18 }, + { 1, 3, 17, 19 }, + { 8, 10, 24, 26 }, + { 9, 11, 25, 27 }, + { 4, 6, 20, 22 }, + { 5, 7, 21, 23 }, + { 12, 14, 28, 30 }, + { 13, 15, 29, 31 } +}; + +const DWORD blockTable16Z[8][4] = { + { 24, 26, 16, 18 }, + { 25, 27, 17, 19 }, + { 28, 30, 20, 22 }, + { 29, 31, 21, 23 }, + { 8, 10, 0, 2 }, + { 9, 11, 1, 3 }, + { 12, 14, 4, 6 }, + { 13, 15, 5, 7 } +}; + +const DWORD blockTable16SZ[8][4] = { + { 24, 26, 8, 10 }, + { 25, 27, 9, 11 }, + { 16, 18, 0, 2 }, + { 17, 19, 1, 3 }, + { 28, 30, 12, 14 }, + { 29, 31, 13, 15 }, + { 20, 22, 4, 6 }, + { 21, 23, 5, 7 } +}; + +const DWORD blockTable8[4][8] = { + { 0, 1, 4, 5, 16, 17, 20, 21}, + { 2, 3, 6, 7, 18, 19, 22, 23}, + { 8, 9, 12, 13, 24, 25, 28, 29}, + { 10, 11, 14, 15, 26, 27, 30, 31} +}; + +const DWORD blockTable4[8][4] = { + { 0, 2, 8, 10 }, + { 1, 3, 9, 11 }, + { 4, 6, 12, 14 }, + { 5, 7, 13, 15 }, + { 16, 18, 24, 26 }, + { 17, 19, 25, 27 }, + { 20, 22, 28, 30 }, + { 21, 23, 29, 31 } +}; + +const DWORD columnTable32[8][8] = { + { 0, 1, 4, 5, 8, 9, 12, 13 }, + { 2, 3, 6, 7, 10, 11, 14, 15 }, + { 16, 17, 20, 21, 24, 25, 28, 29 }, + { 18, 19, 22, 23, 26, 27, 30, 31 }, + { 32, 33, 36, 37, 40, 41, 44, 45 }, + { 34, 35, 38, 39, 42, 43, 46, 47 }, + { 48, 49, 52, 53, 56, 57, 60, 61 }, + { 50, 51, 54, 55, 58, 59, 62, 63 }, +}; + +const DWORD columnTable16[8][16] = { + { 0, 2, 8, 10, 16, 18, 24, 26, + 1, 3, 9, 11, 17, 19, 25, 27 }, + { 4, 6, 12, 14, 20, 22, 28, 30, + 5, 7, 13, 15, 21, 23, 29, 31 }, + { 32, 34, 40, 42, 48, 50, 56, 58, + 33, 35, 41, 43, 49, 51, 57, 59 }, + { 36, 38, 44, 46, 52, 54, 60, 62, + 37, 39, 45, 47, 53, 55, 61, 63 }, + { 64, 66, 72, 74, 80, 82, 88, 90, + 65, 67, 73, 75, 81, 83, 89, 91 }, + { 68, 70, 76, 78, 84, 86, 92, 94, + 69, 71, 77, 79, 85, 87, 93, 95 }, + { 96, 98, 104, 106, 112, 114, 120, 122, + 97, 99, 105, 107, 113, 115, 121, 123 }, + { 100, 102, 108, 110, 116, 118, 124, 126, + 101, 103, 109, 111, 117, 119, 125, 127 }, +}; + +const DWORD columnTable8[16][16] = { + { 0, 4, 16, 20, 32, 36, 48, 52, // column 0 + 2, 6, 18, 22, 34, 38, 50, 54 }, + { 8, 12, 24, 28, 40, 44, 56, 60, + 10, 14, 26, 30, 42, 46, 58, 62 }, + { 33, 37, 49, 53, 1, 5, 17, 21, + 35, 39, 51, 55, 3, 7, 19, 23 }, + { 41, 45, 57, 61, 9, 13, 25, 29, + 43, 47, 59, 63, 11, 15, 27, 31 }, + { 96, 100, 112, 116, 64, 68, 80, 84, // column 1 + 98, 102, 114, 118, 66, 70, 82, 86 }, + { 104, 108, 120, 124, 72, 76, 88, 92, + 106, 110, 122, 126, 74, 78, 90, 94 }, + { 65, 69, 81, 85, 97, 101, 113, 117, + 67, 71, 83, 87, 99, 103, 115, 119 }, + { 73, 77, 89, 93, 105, 109, 121, 125, + 75, 79, 91, 95, 107, 111, 123, 127 }, + { 128, 132, 144, 148, 160, 164, 176, 180, // column 2 + 130, 134, 146, 150, 162, 166, 178, 182 }, + { 136, 140, 152, 156, 168, 172, 184, 188, + 138, 142, 154, 158, 170, 174, 186, 190 }, + { 161, 165, 177, 181, 129, 133, 145, 149, + 163, 167, 179, 183, 131, 135, 147, 151 }, + { 169, 173, 185, 189, 137, 141, 153, 157, + 171, 175, 187, 191, 139, 143, 155, 159 }, + { 224, 228, 240, 244, 192, 196, 208, 212, // column 3 + 226, 230, 242, 246, 194, 198, 210, 214 }, + { 232, 236, 248, 252, 200, 204, 216, 220, + 234, 238, 250, 254, 202, 206, 218, 222 }, + { 193, 197, 209, 213, 225, 229, 241, 245, + 195, 199, 211, 215, 227, 231, 243, 247 }, + { 201, 205, 217, 221, 233, 237, 249, 253, + 203, 207, 219, 223, 235, 239, 251, 255 }, +}; + +const DWORD columnTable4[16][32] = { + { 0, 8, 32, 40, 64, 72, 96, 104, // column 0 + 2, 10, 34, 42, 66, 74, 98, 106, + 4, 12, 36, 44, 68, 76, 100, 108, + 6, 14, 38, 46, 70, 78, 102, 110 }, + { 16, 24, 48, 56, 80, 88, 112, 120, + 18, 26, 50, 58, 82, 90, 114, 122, + 20, 28, 52, 60, 84, 92, 116, 124, + 22, 30, 54, 62, 86, 94, 118, 126 }, + { 65, 73, 97, 105, 1, 9, 33, 41, + 67, 75, 99, 107, 3, 11, 35, 43, + 69, 77, 101, 109, 5, 13, 37, 45, + 71, 79, 103, 111, 7, 15, 39, 47 }, + { 81, 89, 113, 121, 17, 25, 49, 57, + 83, 91, 115, 123, 19, 27, 51, 59, + 85, 93, 117, 125, 21, 29, 53, 61, + 87, 95, 119, 127, 23, 31, 55, 63 }, + { 192, 200, 224, 232, 128, 136, 160, 168, // column 1 + 194, 202, 226, 234, 130, 138, 162, 170, + 196, 204, 228, 236, 132, 140, 164, 172, + 198, 206, 230, 238, 134, 142, 166, 174 }, + { 208, 216, 240, 248, 144, 152, 176, 184, + 210, 218, 242, 250, 146, 154, 178, 186, + 212, 220, 244, 252, 148, 156, 180, 188, + 214, 222, 246, 254, 150, 158, 182, 190 }, + { 129, 137, 161, 169, 193, 201, 225, 233, + 131, 139, 163, 171, 195, 203, 227, 235, + 133, 141, 165, 173, 197, 205, 229, 237, + 135, 143, 167, 175, 199, 207, 231, 239 }, + { 145, 153, 177, 185, 209, 217, 241, 249, + 147, 155, 179, 187, 211, 219, 243, 251, + 149, 157, 181, 189, 213, 221, 245, 253, + 151, 159, 183, 191, 215, 223, 247, 255 }, + { 256, 264, 288, 296, 320, 328, 352, 360, // column 2 + 258, 266, 290, 298, 322, 330, 354, 362, + 260, 268, 292, 300, 324, 332, 356, 364, + 262, 270, 294, 302, 326, 334, 358, 366 }, + { 272, 280, 304, 312, 336, 344, 368, 376, + 274, 282, 306, 314, 338, 346, 370, 378, + 276, 284, 308, 316, 340, 348, 372, 380, + 278, 286, 310, 318, 342, 350, 374, 382 }, + { 321, 329, 353, 361, 257, 265, 289, 297, + 323, 331, 355, 363, 259, 267, 291, 299, + 325, 333, 357, 365, 261, 269, 293, 301, + 327, 335, 359, 367, 263, 271, 295, 303 }, + { 337, 345, 369, 377, 273, 281, 305, 313, + 339, 347, 371, 379, 275, 283, 307, 315, + 341, 349, 373, 381, 277, 285, 309, 317, + 343, 351, 375, 383, 279, 287, 311, 319 }, + { 448, 456, 480, 488, 384, 392, 416, 424, // column 3 + 450, 458, 482, 490, 386, 394, 418, 426, + 452, 460, 484, 492, 388, 396, 420, 428, + 454, 462, 486, 494, 390, 398, 422, 430 }, + { 464, 472, 496, 504, 400, 408, 432, 440, + 466, 474, 498, 506, 402, 410, 434, 442, + 468, 476, 500, 508, 404, 412, 436, 444, + 470, 478, 502, 510, 406, 414, 438, 446 }, + { 385, 393, 417, 425, 449, 457, 481, 489, + 387, 395, 419, 427, 451, 459, 483, 491, + 389, 397, 421, 429, 453, 461, 485, 493, + 391, 399, 423, 431, 455, 463, 487, 495 }, + { 401, 409, 433, 441, 465, 473, 497, 505, + 403, 411, 435, 443, 467, 475, 499, 507, + 405, 413, 437, 445, 469, 477, 501, 509, + 407, 415, 439, 447, 471, 479, 503, 511 }, +}; + +const int primVertexCount[8] = +{ + 1, 2, 2, 3, 3, 3, 2, 1 +}; \ No newline at end of file diff --git a/gsdx/GSTables.h b/gsdx/GSTables.h new file mode 100644 index 0000000..45f323e --- /dev/null +++ b/gsdx/GSTables.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +extern const DWORD blockTable32[4][8]; +extern const DWORD blockTable32Z[4][8]; +extern const DWORD blockTable16[8][4]; +extern const DWORD blockTable16S[8][4]; +extern const DWORD blockTable16Z[8][4]; +extern const DWORD blockTable16SZ[8][4]; +extern const DWORD blockTable8[4][8]; +extern const DWORD blockTable4[8][4]; +extern const DWORD columnTable32[8][8]; +extern const DWORD columnTable16[8][16]; +extern const DWORD columnTable8[16][16]; +extern const DWORD columnTable4[16][32]; +extern const int primVertexCount[8]; diff --git a/gsdx/GSUtil.cpp b/gsdx/GSUtil.cpp new file mode 100644 index 0000000..90c0017 --- /dev/null +++ b/gsdx/GSUtil.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GS.h" + +bool HasSharedBits(DWORD spsm, DWORD dpsm) +{ + switch(spsm) + { + case PSM_PSMCT32: + case PSM_PSMCT16: + case PSM_PSMCT16S: + case PSM_PSMT8: + case PSM_PSMT4: + case PSM_PSMZ32: + case PSM_PSMZ16: + case PSM_PSMZ16S: + return true; + case PSM_PSMCT24: + case PSM_PSMZ24: + return !(dpsm == PSM_PSMT8H || dpsm == PSM_PSMT4HL || dpsm == PSM_PSMT4HH); + case PSM_PSMT8H: + return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24); + case PSM_PSMT4HL: + return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24 || dpsm == PSM_PSMT4HH); + case PSM_PSMT4HH: + return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24 || dpsm == PSM_PSMT4HL); + } + + return true; +} + +bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm) +{ + if(sbp != dbp) return false; + + return HasSharedBits(spsm, dpsm); +} + +bool HasCompatibleBits(DWORD spsm, DWORD dpsm) +{ + if(spsm == dpsm) return true; + + switch(spsm) + { + case PSM_PSMCT32: + case PSM_PSMCT24: + return dpsm == PSM_PSMCT32 || dpsm == PSM_PSMCT24; + case PSM_PSMCT16: + case PSM_PSMCT16S: + return dpsm == PSM_PSMCT16 || dpsm == PSM_PSMCT16S; + case PSM_PSMZ32: + case PSM_PSMZ24: + return dpsm == PSM_PSMZ32 || dpsm == PSM_PSMZ24; + case PSM_PSMZ16: + case PSM_PSMZ16S: + return dpsm == PSM_PSMZ16 || dpsm == PSM_PSMZ16S; + } + + return false; +} + +bool IsRectInRect(const CRect& inner, const CRect& outer) +{ + return outer.left <= inner.left && inner.right <= outer.right && outer.top <= inner.top && inner.bottom <= outer.bottom; +} + +bool IsRectInRectH(const CRect& inner, const CRect& outer) +{ + return outer.top <= inner.top && inner.bottom <= outer.bottom; +} + +bool IsRectInRectV(const CRect& inner, const CRect& outer) +{ + return outer.left <= inner.left && inner.right <= outer.right; +} diff --git a/gsdx/GSUtil.h b/gsdx/GSUtil.h new file mode 100644 index 0000000..70bb983 --- /dev/null +++ b/gsdx/GSUtil.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +extern bool HasSharedBits(DWORD spsm, DWORD dpsm); +extern bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm); +extern bool HasCompatibleBits(DWORD spsm, DWORD dpsm); +extern bool IsRectInRect(const CRect& inner, const CRect& outer); +extern bool IsRectInRectH(const CRect& inner, const CRect& outer); +extern bool IsRectInRectV(const CRect& inner, const CRect& outer); diff --git a/gsdx/GSVertex.h b/gsdx/GSVertex.h new file mode 100644 index 0000000..36c33c9 --- /dev/null +++ b/gsdx/GSVertex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GS.h" + +#pragma pack(push, 1) + +struct GSVertex +{ + GIFRegRGBAQ RGBAQ; + GIFRegST ST; + GIFRegUV UV; + GIFRegXYZ XYZ; + GIFRegFOG FOG; + + GSVertex() {memset(this, 0, sizeof(*this));} +}; + +#pragma pack(pop) diff --git a/gsdx/GSVertexList.cpp b/gsdx/GSVertexList.cpp new file mode 100644 index 0000000..d39b008 --- /dev/null +++ b/gsdx/GSVertexList.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSVertexList.h" \ No newline at end of file diff --git a/gsdx/GSVertexList.h b/gsdx/GSVertexList.h new file mode 100644 index 0000000..12abd38 --- /dev/null +++ b/gsdx/GSVertexList.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +template class GSVertexList +{ + Vertex* m_v; + int m_head; + int m_tail; + int m_count; + +public: + GSVertexList() + { + m_v = (Vertex*)_aligned_malloc(sizeof(Vertex)*4, 16); + + RemoveAll(); + } + + virtual ~GSVertexList() + { + _aligned_free(m_v); + } + + void RemoveAll() + { + m_head = m_tail = m_count = 0; + } + + Vertex& AddTail() + { + ASSERT(m_count < 4); + + Vertex& v = m_v[m_tail]; + m_tail = (m_tail+1)&3; + m_count++; + return v; + } + + void AddTail(Vertex& v) + { + ASSERT(m_count < 4); + + m_v[m_tail] = v; + m_tail = (m_tail+1)&3; + m_count++; + } + + void RemoveAt(int i, Vertex& v) + { + GetAt(i, v); + + i = (m_head+i)&3; + if(i == m_head) m_head = (m_head+1)&3; + else for(m_tail = (m_tail+4-1)&3; i != m_tail; i = (i+1)&3) m_v[i] = m_v[(i+1)&3]; + m_count--; + } + + void GetAt(int i, Vertex& v) + { + ASSERT(m_count > 0); + + v = m_v[(m_head+i)&3]; + } + + int GetCount() + { + return m_count; + } +}; diff --git a/gsdx/GSdx_vs2005.vcproj b/gsdx/GSdx_vs2005.vcproj new file mode 100644 index 0000000..ac5742e --- /dev/null +++ b/gsdx/GSdx_vs2005.vcprojdiff --git a/gsdx/GSdx_vs2008.vcproj b/gsdx/GSdx_vs2008.vcproj new file mode 100644 index 0000000..d4f692a --- /dev/null +++ b/gsdx/GSdx_vs2008.vcprojdiff --git a/gsdx/stdafx.cpp b/gsdx/stdafx.cpp new file mode 100644 index 0000000..3832007 --- /dev/null +++ b/gsdx/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// GSdx.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/gsdx/stdafx.h b/gsdx/stdafx.h new file mode 100644 index 0000000..899908a --- /dev/null +++ b/gsdx/stdafx.h @@ -0,0 +1,54 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#pragma once + +#pragma warning(disable: 4996) + +#ifndef VC_EXTRALEAN +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers +#endif + +// Modify the following defines if you have to target a platform prior to the ones specified below. +// Refer to MSDN for the latest info on corresponding values for different platforms. +#ifndef WINVER // Allow use of features specific to Windows 95 and Windows NT 4 or later. +#define WINVER 0x0510 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later. +#endif + +#ifndef _WIN32_WINNT // Allow use of features specific to Windows NT 4 or later. +#define _WIN32_WINNT 0x0400 // Change this to the appropriate value to target Windows 2000 or later. +#endif + +#ifndef _WIN32_WINDOWS // Allow use of features specific to Windows 98 or later. +#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later. +#endif + +#ifndef _WIN32_IE // Allow use of features specific to IE 4.0 or later. +#define _WIN32_IE 0x0400 // Change this to the appropriate value to target IE 5.0 or later. +#endif + +#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit + +#include // MFC core and standard components +//#include // MFC extensions +//#include +#include +#include +#include +#include +#include + +#define countof(a) (sizeof(a)/sizeof(a[0])) + +#ifndef RESTRICT + #ifdef __INTEL_COMPILER + #define RESTRICT restrict + #elif _MSC_VER >= 1400 + #define RESTRICT __restrict + #else + #define RESTRICT + #endif +#endif + diff --git a/gsdx/x86-32.asm b/gsdx/x86-32.asm new file mode 100644 index 0000000..4e945c4 --- /dev/null +++ b/gsdx/x86-32.asm @@ -0,0 +1,1335 @@ + + .686 + .model flat + .mmx + .xmm + + .const + + __uvmin DD 0d01502f9r ; -1e+010 + __uvmax DD 0501502f9r ; +1e+010 + + .code + +; +; memsetd +; + +@memsetd@12 proc public + + push edi + + mov edi, ecx + mov eax, edx + mov ecx, [esp+4+4] + cld + rep stosd + + pop edi + + ret 4 + +@memsetd@12 endp + +; +; SaturateColor +; + +@SaturateColor_sse2@4 proc public + + pxor xmm0, xmm0 + movdqa xmm1, [ecx] + packssdw xmm1, xmm0 + packuswb xmm1, xmm0 + punpcklbw xmm1, xmm0 + punpcklwd xmm1, xmm0 + movdqa [ecx], xmm1 + + ret + +@SaturateColor_sse2@4 endp + +@SaturateColor_asm@4 proc public + + push esi + + mov esi, ecx + + xor eax, eax + mov edx, 000000ffh + + mov ecx, [esi] + cmp ecx, eax + cmovl ecx, eax + cmp ecx, edx + cmovg ecx, edx + mov [esi], ecx + + mov ecx, [esi+4] + cmp ecx, eax + cmovl ecx, eax + cmp ecx, edx + cmovg ecx, edx + mov [esi+4], ecx + + mov ecx, [esi+8] + cmp ecx, eax + cmovl ecx, eax + cmp ecx, edx + cmovg ecx, edx + mov [esi+8], ecx + + mov ecx, [esi+12] + cmp ecx, eax + cmovl ecx, eax + cmp ecx, edx + cmovg ecx, edx + mov [esi+12], ecx + + pop esi + + ret + +@SaturateColor_asm@4 endp + +; +; swizzling +; + +punpck macro op, sd0, sd2, s1, s3, d1, d3 + + movdqa @CatStr(xmm, %d1), @CatStr(xmm, %sd0) + pshufd @CatStr(xmm, %d3), @CatStr(xmm, %sd2), 0e4h + + @CatStr(punpckl, op) @CatStr(xmm, %sd0), @CatStr(xmm, %s1) + @CatStr(punpckh, op) @CatStr(xmm, %d1), @CatStr(xmm, %s1) + @CatStr(punpckl, op) @CatStr(xmm, %sd2), @CatStr(xmm, %s3) + @CatStr(punpckh, op) @CatStr(xmm, %d3), @CatStr(xmm, %s3) + + endm + +punpcknb macro + + movdqa xmm4, xmm0 + pshufd xmm5, xmm1, 0e4h + + psllq xmm1, 4 + psrlq xmm4, 4 + + movdqa xmm6, xmm7 + pand xmm0, xmm7 + pandn xmm6, xmm1 + por xmm0, xmm6 + + movdqa xmm6, xmm7 + pand xmm4, xmm7 + pandn xmm6, xmm5 + por xmm4, xmm6 + + movdqa xmm1, xmm4 + + movdqa xmm4, xmm2 + pshufd xmm5, xmm3, 0e4h + + psllq xmm3, 4 + psrlq xmm4, 4 + + movdqa xmm6, xmm7 + pand xmm2, xmm7 + pandn xmm6, xmm3 + por xmm2, xmm6 + + movdqa xmm6, xmm7 + pand xmm4, xmm7 + pandn xmm6, xmm5 + por xmm4, xmm6 + + movdqa xmm3, xmm4 + + punpck bw, 0, 2, 1, 3, 4, 6 + + endm + +; +; unSwizzleBlock32 +; + +@unSwizzleBlock32_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + lea eax, [ebx*2] + add eax, ebx + + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+ebx], xmm4 + movdqa [edx+ebx+16], xmm6 + + movdqa xmm0, [ecx+16*4] + movdqa xmm1, [ecx+16*5] + movdqa xmm2, [ecx+16*6] + movdqa xmm3, [ecx+16*7] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + movdqa [edx+ebx*2], xmm0 + movdqa [edx+ebx*2+16], xmm2 + movdqa [edx+eax], xmm4 + movdqa [edx+eax+16], xmm6 + + lea edx, [edx+ebx*4] + + movdqa xmm0, [ecx+16*8] + movdqa xmm1, [ecx+16*9] + movdqa xmm2, [ecx+16*10] + movdqa xmm3, [ecx+16*11] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+ebx], xmm4 + movdqa [edx+ebx+16], xmm6 + + movdqa xmm0, [ecx+16*12] + movdqa xmm1, [ecx+16*13] + movdqa xmm2, [ecx+16*14] + movdqa xmm3, [ecx+16*15] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + movdqa [edx+ebx*2], xmm0 + movdqa [edx+ebx*2+16], xmm2 + movdqa [edx+eax], xmm4 + movdqa [edx+eax+16], xmm6 + + pop ebx + + ret 4 + +@unSwizzleBlock32_sse2@12 endp + +; +; unSwizzleBlock16 +; + +@unSwizzleBlock16_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck dq, 0, 4, 2, 6, 1, 3 + punpck wd, 0, 4, 1, 3, 2, 6 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+ebx], xmm4 + movdqa [edx+ebx+16], xmm6 + + add ecx, 64 + lea edx, [edx+ebx*2] + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock16_sse2@12 endp + +; +; unSwizzleBlock8 +; + +@unSwizzleBlock8_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm4, [ecx+16*2] + movdqa xmm5, [ecx+16*3] + + punpck bw, 0, 4, 1, 5, 2, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 2, 4, 6, 1, 3 + + pshufd xmm1, xmm1, 0b1h + pshufd xmm3, xmm3, 0b1h + + movdqa [edx], xmm0 + movdqa [edx+ebx], xmm2 + lea edx, [edx+ebx*2] + + movdqa [edx], xmm1 + movdqa [edx+ebx], xmm3 + lea edx, [edx+ebx*2] + + ; col 1, 3 + + movdqa xmm0, [ecx+16*4] + movdqa xmm1, [ecx+16*5] + movdqa xmm4, [ecx+16*6] + movdqa xmm5, [ecx+16*7] + + punpck bw, 0, 4, 1, 5, 2, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 2, 4, 6, 1, 3 + + pshufd xmm0, xmm0, 0b1h + pshufd xmm2, xmm2, 0b1h + + movdqa [edx], xmm0 + movdqa [edx+ebx], xmm2 + lea edx, [edx+ebx*2] + + movdqa [edx], xmm1 + movdqa [edx+ebx], xmm3 + lea edx, [edx+ebx*2] + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock8_sse2@12 endp + +; +; unSwizzleBlock4 +; + +@unSwizzleBlock4_sse2@12 proc public + + push ebx + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm4, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck dq, 0, 4, 1, 3, 2, 6 + punpck dq, 0, 2, 4, 6, 1, 3 + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck wd, 0, 2, 1, 3, 4, 6 + + pshufd xmm0, xmm0, 0d8h + pshufd xmm2, xmm2, 0d8h + pshufd xmm4, xmm4, 0d8h + pshufd xmm6, xmm6, 0d8h + + punpck qdq, 0, 2, 4, 6, 1, 3 + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + movdqa [edx], xmm0 + movdqa [edx+ebx], xmm2 + lea edx, [edx+ebx*2] + + movdqa [edx], xmm1 + movdqa [edx+ebx], xmm3 + lea edx, [edx+ebx*2] + + ; col 1, 3 + + movdqa xmm0, [ecx+16*4] + movdqa xmm1, [ecx+16*5] + movdqa xmm4, [ecx+16*6] + movdqa xmm3, [ecx+16*7] + + punpck dq, 0, 4, 1, 3, 2, 6 + punpck dq, 0, 2, 4, 6, 1, 3 + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck wd, 0, 2, 1, 3, 4, 6 + + pshufd xmm0, xmm0, 0d8h + pshufd xmm2, xmm2, 0d8h + pshufd xmm4, xmm4, 0d8h + pshufd xmm6, xmm6, 0d8h + + punpck qdq, 0, 2, 4, 6, 1, 3 + + pshuflw xmm0, xmm0, 0b1h + pshuflw xmm2, xmm2, 0b1h + pshufhw xmm0, xmm0, 0b1h + pshufhw xmm2, xmm2, 0b1h + + movdqa [edx], xmm0 + movdqa [edx+ebx], xmm2 + lea edx, [edx+ebx*2] + + movdqa [edx], xmm1 + movdqa [edx+ebx], xmm3 + lea edx, [edx+ebx*2] + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock4_sse2@12 endp + +; +; unSwizzleBlock8HP +; + +@unSwizzleBlock8HP_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 24 + psrld xmm2, 24 + psrld xmm4, 24 + psrld xmm6, 24 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + movlps qword ptr [edx], xmm0 + movhps qword ptr [edx+ebx], xmm0 + + add ecx, 64 + lea edx, [edx+ebx*2] + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock8HP_sse2@12 endp + +; +; unSwizzleBlock4HLP +; + +@unSwizzleBlock4HLP_sse2@12 proc public + + push ebx + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 24 + psrld xmm2, 24 + psrld xmm4, 24 + psrld xmm6, 24 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + pand xmm0, xmm7 + + movlps qword ptr [edx], xmm0 + movhps qword ptr [edx+ebx], xmm0 + + add ecx, 64 + lea edx, [edx+ebx*2] + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock4HLP_sse2@12 endp + +; +; unSwizzleBlock4HHP +; + +@unSwizzleBlock4HHP_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 28 + psrld xmm2, 28 + psrld xmm4, 28 + psrld xmm6, 28 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + movlps qword ptr [edx], xmm0 + movhps qword ptr [edx+ebx], xmm0 + + add ecx, 64 + lea edx, [edx+ebx*2] + + dec eax + jnz @B + + pop ebx + + ret 4 + +@unSwizzleBlock4HHP_sse2@12 endp + +; +; unSwizzleBlock4P +; + +@unSwizzleBlock4P_sse2@12 proc public + + push esi + push edi + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + mov esi, [esp+4+8] + lea edi, [esi*2] + add edi, esi + + ; col 0 + + movdqa xmm0, [ecx+16*0] + movdqa xmm1, [ecx+16*1] + movdqa xmm2, [ecx+16*2] + movdqa xmm3, [ecx+16*3] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm7 + pandn xmm1, xmm0 + pand xmm0, xmm7 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm2 + pand xmm2, xmm7 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+esi*2], xmm1 + movdqa [edx+esi*2+16], xmm3 + + movdqa xmm1, xmm7 + pandn xmm1, xmm4 + pand xmm4, xmm7 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm6 + pand xmm6, xmm7 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa [edx+esi], xmm4 + movdqa [edx+esi+16], xmm6 + movdqa [edx+edi], xmm1 + movdqa [edx+edi+16], xmm3 + + lea edx, [edx+esi*4] + + ; col 1 + + movdqa xmm0, [ecx+16*4] + movdqa xmm1, [ecx+16*5] + movdqa xmm2, [ecx+16*6] + movdqa xmm3, [ecx+16*7] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm7 + pandn xmm1, xmm0 + pand xmm0, xmm7 + pshufd xmm0, xmm0, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm2 + pand xmm2, xmm7 + pshufd xmm2, xmm2, 0b1h + psrlq xmm3, 4 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+esi*2], xmm1 + movdqa [edx+esi*2+16], xmm3 + + movdqa xmm1, xmm7 + pandn xmm1, xmm4 + pand xmm4, xmm7 + pshufd xmm4, xmm4, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm6 + pand xmm6, xmm7 + pshufd xmm6, xmm6, 0b1h + psrlq xmm3, 4 + + movdqa [edx+esi], xmm4 + movdqa [edx+esi+16], xmm6 + movdqa [edx+edi], xmm1 + movdqa [edx+edi+16], xmm3 + + lea edx, [edx+esi*4] + + ; col 2 + + movdqa xmm0, [ecx+16*8] + movdqa xmm1, [ecx+16*9] + movdqa xmm2, [ecx+16*10] + movdqa xmm3, [ecx+16*11] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm7 + pandn xmm1, xmm0 + pand xmm0, xmm7 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm2 + pand xmm2, xmm7 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+esi*2], xmm1 + movdqa [edx+esi*2+16], xmm3 + + movdqa xmm1, xmm7 + pandn xmm1, xmm4 + pand xmm4, xmm7 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm6 + pand xmm6, xmm7 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa [edx+esi], xmm4 + movdqa [edx+esi+16], xmm6 + movdqa [edx+edi], xmm1 + movdqa [edx+edi+16], xmm3 + + lea edx, [edx+esi*4] + + ; col 3 + + movdqa xmm0, [ecx+16*12] + movdqa xmm1, [ecx+16*13] + movdqa xmm2, [ecx+16*14] + movdqa xmm3, [ecx+16*15] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm7 + pandn xmm1, xmm0 + pand xmm0, xmm7 + pshufd xmm0, xmm0, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm2 + pand xmm2, xmm7 + pshufd xmm2, xmm2, 0b1h + psrlq xmm3, 4 + + movdqa [edx], xmm0 + movdqa [edx+16], xmm2 + movdqa [edx+esi*2], xmm1 + movdqa [edx+esi*2+16], xmm3 + + movdqa xmm1, xmm7 + pandn xmm1, xmm4 + pand xmm4, xmm7 + pshufd xmm4, xmm4, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm7 + pandn xmm3, xmm6 + pand xmm6, xmm7 + pshufd xmm6, xmm6, 0b1h + psrlq xmm3, 4 + + movdqa [edx+esi], xmm4 + movdqa [edx+esi+16], xmm6 + movdqa [edx+edi], xmm1 + movdqa [edx+edi+16], xmm3 + + ; lea edx, [edx+esi*4] + + pop edi + pop esi + + ret 4 + +@unSwizzleBlock4P_sse2@12 endp + +; +; swizzling +; + +; +; SwizzleBlock32 +; + +@SwizzleBlock32_sse2@16 proc public + + + push esi + push edi + + mov edi, ecx + mov esi, edx + mov edx, [esp+4+8] + mov ecx, 4 + + mov eax, [esp+8+8] + cmp eax, 0ffffffffh + jnz SwizzleBlock32_sse2@WM + + align 16 +@@: + movdqa xmm0, [esi] + movdqa xmm4, [esi+16] + movdqa xmm1, [esi+edx] + movdqa xmm5, [esi+edx+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa [edi+16*0], xmm0 + movdqa [edi+16*1], xmm2 + movdqa [edi+16*2], xmm4 + movdqa [edi+16*3], xmm6 + + lea esi, [esi+edx*2] + add edi, 64 + + dec ecx + jnz @B + + pop edi + pop esi + + ret 8 + +SwizzleBlock32_sse2@WM: + + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + align 16 +@@: + movdqa xmm0, [esi] + movdqa xmm4, [esi+16] + movdqa xmm1, [esi+edx] + movdqa xmm5, [esi+edx+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + + pandn xmm3, [edi+16*0] + pand xmm0, xmm7 + por xmm0, xmm3 + movdqa [edi+16*0], xmm0 + + pandn xmm5, [edi+16*1] + pand xmm2, xmm7 + por xmm2, xmm5 + movdqa [edi+16*1], xmm2 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + + pandn xmm3, [edi+16*2] + pand xmm4, xmm7 + por xmm4, xmm3 + movdqa [edi+16*2], xmm4 + + pandn xmm5, [edi+16*3] + pand xmm6, xmm7 + por xmm6, xmm5 + movdqa [edi+16*3], xmm6 + + lea esi, [esi+edx*2] + add edi, 64 + + dec ecx + jnz @B + + pop edi + pop esi + + ret 8 + +@SwizzleBlock32_sse2@16 endp + +; +; SwizzleBlock16 +; + +@SwizzleBlock16_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqa xmm0, [edx] + movdqa xmm1, [edx+16] + movdqa xmm2, [edx+ebx] + movdqa xmm3, [edx+ebx+16] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 5 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm1 + movdqa [ecx+16*2], xmm4 + movdqa [ecx+16*3], xmm5 + + lea edx, [edx+ebx*2] + add ecx, 64 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock16_sse2@12 endp + +; +; SwizzleBlock8 +; + +@SwizzleBlock8_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [edx] + movdqa xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + pshufd xmm1, [edx], 0b1h + pshufd xmm3, [edx+ebx], 0b1h + lea edx, [edx+ebx*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm4 + movdqa [ecx+16*2], xmm1 + movdqa [ecx+16*3], xmm5 + + ; col 1, 3 + + pshufd xmm0, [edx], 0b1h + pshufd xmm2, [edx+ebx], 0b1h + lea edx, [edx+ebx*2] + + movdqa xmm1, [edx] + movdqa xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [ecx+16*4], xmm0 + movdqa [ecx+16*5], xmm4 + movdqa [ecx+16*6], xmm1 + movdqa [ecx+16*7], xmm5 + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock8_sse2@12 endp + +; +; SwizzleBlock4 +; + +@SwizzleBlock4_sse2@12 proc public + + push ebx + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [edx] + movdqa xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + movdqa xmm1, [edx] + movdqa xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm1 + movdqa [ecx+16*2], xmm4 + movdqa [ecx+16*3], xmm3 + + ; col 1, 3 + + movdqa xmm0, [edx] + movdqa xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + movdqa xmm1, [edx] + movdqa xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + pshuflw xmm0, xmm0, 0b1h + pshuflw xmm2, xmm2, 0b1h + pshufhw xmm0, xmm0, 0b1h + pshufhw xmm2, xmm2, 0b1h + + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [ecx+16*4], xmm0 + movdqa [ecx+16*5], xmm1 + movdqa [ecx+16*6], xmm4 + movdqa [ecx+16*7], xmm3 + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock4_sse2@12 endp + +; +; swizzling with unaligned reads +; + +; +; SwizzleBlock32u +; + +@SwizzleBlock32u_sse2@16 proc public + + push esi + push edi + + mov edi, ecx + mov esi, edx + mov edx, [esp+4+8] + mov ecx, 4 + + mov eax, [esp+8+8] + cmp eax, 0ffffffffh + jnz SwizzleBlock32u_sse2@WM + + align 16 +@@: + movdqu xmm0, [esi] + movdqu xmm4, [esi+16] + movdqu xmm1, [esi+edx] + movdqu xmm5, [esi+edx+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa [edi+16*0], xmm0 + movdqa [edi+16*1], xmm2 + movdqa [edi+16*2], xmm4 + movdqa [edi+16*3], xmm6 + + lea esi, [esi+edx*2] + add edi, 64 + + dec ecx + jnz @B + + pop edi + pop esi + + ret 8 + +SwizzleBlock32u_sse2@WM: + + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + align 16 +@@: + movdqu xmm0, [esi] + movdqu xmm4, [esi+16] + movdqu xmm1, [esi+edx] + movdqu xmm5, [esi+edx+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + + pandn xmm3, [edi+16*0] + pand xmm0, xmm7 + por xmm0, xmm3 + movdqa [edi+16*0], xmm0 + + pandn xmm5, [edi+16*1] + pand xmm2, xmm7 + por xmm2, xmm5 + movdqa [edi+16*1], xmm2 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + + pandn xmm3, [edi+16*2] + pand xmm4, xmm7 + por xmm4, xmm3 + movdqa [edi+16*2], xmm4 + + pandn xmm5, [edi+16*3] + pand xmm6, xmm7 + por xmm6, xmm5 + movdqa [edi+16*3], xmm6 + + lea esi, [esi+edx*2] + add edi, 64 + + dec ecx + jnz @B + + pop edi + pop esi + + ret 8 + +@SwizzleBlock32u_sse2@16 endp + +; +; SwizzleBlock16u +; + +@SwizzleBlock16u_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 4 + + align 16 +@@: + movdqu xmm0, [edx] + movdqu xmm1, [edx+16] + movdqu xmm2, [edx+ebx] + movdqu xmm3, [edx+ebx+16] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 5 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm1 + movdqa [ecx+16*2], xmm4 + movdqa [ecx+16*3], xmm5 + + lea edx, [edx+ebx*2] + add ecx, 64 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock16u_sse2@12 endp + +; +; SwizzleBlock8u +; + +@SwizzleBlock8u_sse2@12 proc public + + push ebx + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqu xmm0, [edx] + movdqu xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + movdqu xmm1, [edx] + movdqu xmm3, [edx+ebx] + pshufd xmm1, xmm1, 0b1h + pshufd xmm3, xmm3, 0b1h + lea edx, [edx+ebx*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm4 + movdqa [ecx+16*2], xmm1 + movdqa [ecx+16*3], xmm5 + + ; col 1, 3 + + movdqu xmm0, [edx] + movdqu xmm2, [edx+ebx] + pshufd xmm0, xmm0, 0b1h + pshufd xmm2, xmm2, 0b1h + lea edx, [edx+ebx*2] + + movdqu xmm1, [edx] + movdqu xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [ecx+16*4], xmm0 + movdqa [ecx+16*5], xmm4 + movdqa [ecx+16*6], xmm1 + movdqa [ecx+16*7], xmm5 + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock8u_sse2@12 endp + +; +; SwizzleBlock4u +; + +@SwizzleBlock4u_sse2@12 proc public + + push ebx + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + mov ebx, [esp+4+4] + mov eax, 2 + + align 16 +@@: + ; col 0, 2 + + movdqu xmm0, [edx] + movdqu xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + movdqu xmm1, [edx] + movdqu xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [ecx+16*0], xmm0 + movdqa [ecx+16*1], xmm1 + movdqa [ecx+16*2], xmm4 + movdqa [ecx+16*3], xmm3 + + ; col 1, 3 + + movdqu xmm0, [edx] + movdqu xmm2, [edx+ebx] + lea edx, [edx+ebx*2] + + movdqu xmm1, [edx] + movdqu xmm3, [edx+ebx] + lea edx, [edx+ebx*2] + + pshuflw xmm0, xmm0, 0b1h + pshuflw xmm2, xmm2, 0b1h + pshufhw xmm0, xmm0, 0b1h + pshufhw xmm2, xmm2, 0b1h + + punpcknb + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [ecx+16*4], xmm0 + movdqa [ecx+16*5], xmm1 + movdqa [ecx+16*6], xmm4 + movdqa [ecx+16*7], xmm3 + + add ecx, 128 + + dec eax + jnz @B + + pop ebx + + ret 4 + +@SwizzleBlock4u_sse2@12 endp + + end \ No newline at end of file diff --git a/gsdx/x86-64.asm b/gsdx/x86-64.asm new file mode 100644 index 0000000..70e6d94 --- /dev/null +++ b/gsdx/x86-64.asm @@ -0,0 +1,1422 @@ + + .const + + __uvmin DD 0d01502f9r ; -1e+010 + __uvmax DD 0501502f9r ; +1e+010 + + .code + +; +; memsetd +; + +memsetd proc public + + push rdi + + mov rdi, rcx + mov eax, edx + mov rcx, r8 + cld + rep stosd + + pop rdi + + ret + +memsetd endp + +; +; SaturateColor +; + +SaturateColor_amd64 proc public + + pxor xmm0, xmm0 + movdqa xmm1, [rcx] + packssdw xmm1, xmm0 + packuswb xmm1, xmm0 + punpcklbw xmm1, xmm0 + punpcklwd xmm1, xmm0 + movdqa [rcx], xmm1 + + ret + +SaturateColor_amd64 endp + +; +; swizzling +; + +punpck macro op, sd0, sd2, s1, s3, d1, d3 + + movdqa @CatStr(xmm, %d1), @CatStr(xmm, %sd0) + pshufd @CatStr(xmm, %d3), @CatStr(xmm, %sd2), 0e4h + + @CatStr(punpckl, op) @CatStr(xmm, %sd0), @CatStr(xmm, %s1) + @CatStr(punpckh, op) @CatStr(xmm, %d1), @CatStr(xmm, %s1) + @CatStr(punpckl, op) @CatStr(xmm, %sd2), @CatStr(xmm, %s3) + @CatStr(punpckh, op) @CatStr(xmm, %d3), @CatStr(xmm, %s3) + + endm + +punpck2 macro op, sd0, sd2, sd4, sd6, s1, s3, s5, s7, d1, d3, d5, d7 + + movdqa @CatStr(xmm, %d1), @CatStr(xmm, %sd0) + pshufd @CatStr(xmm, %d3), @CatStr(xmm, %sd2), 0e4h + movdqa @CatStr(xmm, %d5), @CatStr(xmm, %sd4) + pshufd @CatStr(xmm, %d7), @CatStr(xmm, %sd6), 0e4h + + @CatStr(punpckl, op) @CatStr(xmm, %sd0), @CatStr(xmm, %s1) + @CatStr(punpckh, op) @CatStr(xmm, %d1), @CatStr(xmm, %s1) + @CatStr(punpckl, op) @CatStr(xmm, %sd2), @CatStr(xmm, %s3) + @CatStr(punpckh, op) @CatStr(xmm, %d3), @CatStr(xmm, %s3) + @CatStr(punpckl, op) @CatStr(xmm, %sd4), @CatStr(xmm, %s5) + @CatStr(punpckh, op) @CatStr(xmm, %d5), @CatStr(xmm, %s5) + @CatStr(punpckl, op) @CatStr(xmm, %sd6), @CatStr(xmm, %s7) + @CatStr(punpckh, op) @CatStr(xmm, %d7), @CatStr(xmm, %s7) + + endm + +punpcknbl macro + + movdqa xmm4, xmm0 + pshufd xmm5, xmm1, 0e4h + + psllq xmm1, 4 + psrlq xmm4, 4 + + movdqa xmm6, xmm7 + pand xmm0, xmm7 + pandn xmm6, xmm1 + por xmm0, xmm6 + + movdqa xmm6, xmm7 + pand xmm4, xmm7 + pandn xmm6, xmm5 + por xmm4, xmm6 + + movdqa xmm1, xmm4 + + movdqa xmm4, xmm2 + pshufd xmm5, xmm3, 0e4h + + psllq xmm3, 4 + psrlq xmm4, 4 + + movdqa xmm6, xmm7 + pand xmm2, xmm7 + pandn xmm6, xmm3 + por xmm2, xmm6 + + movdqa xmm6, xmm7 + pand xmm4, xmm7 + pandn xmm6, xmm5 + por xmm4, xmm6 + + movdqa xmm3, xmm4 + + punpck bw, 0, 2, 1, 3, 4, 6 + + endm + +punpcknbh macro + + movdqa xmm12, xmm8 + pshufd xmm13, xmm9, 0e4h + + psllq xmm9, 4 + psrlq xmm12, 4 + + movdqa xmm14, xmm15 + pand xmm8, xmm15 + pandn xmm14, xmm9 + por xmm8, xmm14 + + movdqa xmm14, xmm15 + pand xmm12, xmm15 + pandn xmm14, xmm13 + por xmm12, xmm14 + + movdqa xmm9, xmm12 + + movdqa xmm12, xmm10 + pshufd xmm13, xmm11, 0e4h + + psllq xmm11, 4 + psrlq xmm12, 4 + + movdqa xmm14, xmm15 + pand xmm10, xmm15 + pandn xmm14, xmm11 + por xmm10, xmm14 + + movdqa xmm14, xmm15 + pand xmm12, xmm15 + pandn xmm14, xmm13 + por xmm12, xmm14 + + movdqa xmm11, xmm12 + + punpck bw, 8, 10, 9, 11, 12, 14 + + endm + +; +; unSwizzleBlock32 +; + +unSwizzleBlock32_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 4 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + movdqa [rdi], xmm0 + movdqa [rdi+16], xmm2 + movdqa [rdi+r8], xmm4 + movdqa [rdi+r8+16], xmm6 + + add rsi, 64 + lea rdi, [rdi+r8*2] + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock32_amd64 endp + +; +; unSwizzleBlock32_2 (TODO: test me) +; + +unSwizzleBlock32_2_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 2 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + movdqa xmm4, [rsi+16*4] + movdqa xmm5, [rsi+16*5] + movdqa xmm6, [rsi+16*6] + movdqa xmm7, [rsi+16*7] + + punpck2 qdq, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14 + + movdqa [rdi], xmm0 + movdqa [rdi+16], xmm2 + movdqa [rdi+r8], xmm4 + movdqa [rdi+r8+16], xmm6 + lea rdi, [rdi+r8*2] + + movdqa [rdi], xmm8 + movdqa [rdi+16], xmm10 + movdqa [rdi+r8], xmm12 + movdqa [rdi+r8+16], xmm14 + lea rdi, [rdi+r8*2] + + add rsi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock32_2_amd64 endp + +; +; unSwizzleBlock16 +; + +unSwizzleBlock16_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 4 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck dq, 0, 4, 2, 6, 1, 3 + punpck wd, 0, 4, 1, 3, 2, 6 + + movdqa [rdi], xmm0 + movdqa [rdi+16], xmm2 + movdqa [rdi+r8], xmm4 + movdqa [rdi+r8+16], xmm6 + + add rsi, 64 + lea rdi, [rdi+r8*2] + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock16_amd64 endp + +; +; unSwizzleBlock8 +; + +unSwizzleBlock8_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 2 + + ; r9 = r8*3 + lea r9, [r8*2] + add r9, r8 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm4, [rsi+16*2] + movdqa xmm5, [rsi+16*3] + + ; col 1, 3 + + movdqa xmm8, [rsi+16*4] + movdqa xmm9, [rsi+16*5] + movdqa xmm12, [rsi+16*6] + movdqa xmm13, [rsi+16*7] + + ; col 0, 2 + + punpck bw, 0, 4, 1, 5, 2, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 2, 4, 6, 1, 3 + + pshufd xmm1, xmm1, 0b1h + pshufd xmm3, xmm3, 0b1h + + ; col 1, 3 + + punpck bw, 8, 12, 9, 13, 10, 14 + punpck wd, 8, 10, 12, 14, 9, 11 + punpck bw, 8, 10, 9, 11, 12, 14 + punpck qdq, 8, 10, 12, 14, 9, 11 + + pshufd xmm8, xmm8, 0b1h + pshufd xmm10, xmm10, 0b1h + + ; col 0, 2 + + movdqa [rdi], xmm0 + movdqa [rdi+r8], xmm2 + movdqa [rdi+r8*2], xmm1 + movdqa [rdi+r9], xmm3 + lea rdi, [rdi+r8*4] + + ; col 1, 3 + + movdqa [rdi], xmm8 + movdqa [rdi+r8], xmm10 + movdqa [rdi+r8*2], xmm9 + movdqa [rdi+r9], xmm11 + lea rdi, [rdi+r8*4] + + add rsi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock8_amd64 endp + +; +; unSwizzleBlock4 +; + +unSwizzleBlock4_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 2 + + ; r9 = r8*3 + lea r9, [r8*2] + add r9, r8 + + mov eax, 0f0f0f0fh + movd xmm7, rax + pshufd xmm7, xmm7, 0 + movdqa xmm15, xmm7 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm4, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + ; col 1, 3 + + movdqa xmm8, [rsi+16*4] + movdqa xmm9, [rsi+16*5] + movdqa xmm12, [rsi+16*6] + movdqa xmm11, [rsi+16*7] + + ; col 0, 2 + + punpck dq, 0, 4, 1, 3, 2, 6 + punpck dq, 0, 2, 4, 6, 1, 3 + punpcknbl + punpck bw, 0, 2, 4, 6, 1, 3 + punpck wd, 0, 2, 1, 3, 4, 6 + + ; col 1, 3 + + punpck dq, 8, 12, 9, 11, 10, 14 + punpck dq, 8, 10, 12, 14, 9, 11 + punpcknbh + punpck bw, 8, 10, 12, 14, 9, 11 + punpck wd, 8, 10, 9, 11, 12, 14 + + ; col 0, 2 + + pshufd xmm0, xmm0, 0d8h + pshufd xmm2, xmm2, 0d8h + pshufd xmm4, xmm4, 0d8h + pshufd xmm6, xmm6, 0d8h + + ; col 1, 3 + + pshufd xmm8, xmm8, 0d8h + pshufd xmm10, xmm10, 0d8h + pshufd xmm12, xmm12, 0d8h + pshufd xmm14, xmm14, 0d8h + + ; col 0, 2 + + punpck qdq, 0, 2, 4, 6, 1, 3 + + ; col 1, 3 + + punpck qdq, 8, 10, 12, 14, 9, 11 + + ; col 0, 2 + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + ; col 1, 3 + + pshuflw xmm8, xmm8, 0b1h + pshuflw xmm10, xmm10, 0b1h + pshufhw xmm8, xmm8, 0b1h + pshufhw xmm10, xmm10, 0b1h + + ; col 0, 2 + + movdqa [rdi], xmm0 + movdqa [rdi+r8], xmm2 + movdqa [rdi+r8*2], xmm1 + movdqa [rdi+r9], xmm3 + lea rdi, [rdi+r8*4] + + ; col 1, 3 + + movdqa [rdi], xmm8 + movdqa [rdi+r8], xmm10 + movdqa [rdi+r8*2], xmm9 + movdqa [rdi+r9], xmm11 + lea rdi, [rdi+r8*4] + + add rsi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock4_amd64 endp + +; +; unSwizzleBlock8HP +; + +unSwizzleBlock8HP_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 4 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 24 + psrld xmm2, 24 + psrld xmm4, 24 + psrld xmm6, 24 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + movlps qword ptr [rdi], xmm0 + movhps qword ptr [rdi+r8], xmm0 + + add rsi, 64 + lea rdi, [rdi+r8*2] + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock8HP_amd64 endp + +; +; unSwizzleBlock4HLP +; + +unSwizzleBlock4HLP_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 4 + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 24 + psrld xmm2, 24 + psrld xmm4, 24 + psrld xmm6, 24 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + pand xmm0, xmm7 + + movlps qword ptr [rdi], xmm0 + movhps qword ptr [rdi+r8], xmm0 + + add rsi, 64 + lea rdi, [rdi+r8*2] + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock4HLP_amd64 endp + +; +; unSwizzleBlock4HHP +; + +unSwizzleBlock4HHP_amd64 proc public + + push rsi + push rdi + + mov rsi, rcx + mov rdi, rdx + mov rcx, 4 + + align 16 +@@: + movdqa xmm0, [rsi+16*0] + movdqa xmm1, [rsi+16*1] + movdqa xmm2, [rsi+16*2] + movdqa xmm3, [rsi+16*3] + + punpck qdq, 0, 2, 1, 3, 4, 6 + + psrld xmm0, 28 + psrld xmm2, 28 + psrld xmm4, 28 + psrld xmm6, 28 + + packssdw xmm0, xmm2 + packssdw xmm4, xmm6 + packuswb xmm0, xmm4 + + movlps qword ptr [rdi], xmm0 + movhps qword ptr [rdi+r8], xmm0 + + add rsi, 64 + lea rdi, [rdi+r8*2] + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +unSwizzleBlock4HHP_amd64 endp + +; +; unSwizzleBlock4P +; + +unSwizzleBlock4P_amd64 proc public + + mov eax, 0f0f0f0fh + movd xmm8, eax + pshufd xmm8, xmm8, 0 + + ; r9 = r8*3 + lea r9, [r8*2] + add r9, r8 + + ; col 0 + + movdqa xmm0, [rcx+16*0] + movdqa xmm1, [rcx+16*1] + movdqa xmm2, [rcx+16*2] + movdqa xmm3, [rcx+16*3] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm8 + pandn xmm1, xmm0 + pand xmm0, xmm8 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm8 + pandn xmm3, xmm2 + pand xmm2, xmm8 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa xmm5, xmm8 + pandn xmm5, xmm4 + pand xmm4, xmm8 + pshufd xmm5, xmm5, 0b1h + psrlq xmm5, 4 + + movdqa xmm7, xmm8 + pandn xmm7, xmm6 + pand xmm6, xmm8 + pshufd xmm7, xmm7, 0b1h + psrlq xmm7, 4 + + movdqa [rdx], xmm0 + movdqa [rdx+16], xmm2 + movdqa [rdx+r8], xmm4 + movdqa [rdx+r8+16], xmm6 + + movdqa [rdx+r8*2], xmm1 + movdqa [rdx+r8*2+16], xmm3 + movdqa [rdx+r9], xmm5 + movdqa [rdx+r9+16], xmm7 + + lea rdx, [rdx+r8*4] + + ; col 1 + + movdqa xmm0, [rcx+16*4] + movdqa xmm1, [rcx+16*5] + movdqa xmm2, [rcx+16*6] + movdqa xmm3, [rcx+16*7] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm8 + pandn xmm1, xmm0 + pand xmm0, xmm8 + pshufd xmm0, xmm0, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm8 + pandn xmm3, xmm2 + pand xmm2, xmm8 + pshufd xmm2, xmm2, 0b1h + psrlq xmm3, 4 + + movdqa xmm5, xmm8 + pandn xmm5, xmm4 + pand xmm4, xmm8 + pshufd xmm4, xmm4, 0b1h + psrlq xmm5, 4 + + movdqa xmm7, xmm8 + pandn xmm7, xmm6 + pand xmm6, xmm8 + pshufd xmm6, xmm6, 0b1h + psrlq xmm7, 4 + + movdqa [rdx], xmm0 + movdqa [rdx+16], xmm2 + movdqa [rdx+r8], xmm4 + movdqa [rdx+r8+16], xmm6 + + movdqa [rdx+r8*2], xmm1 + movdqa [rdx+r8*2+16], xmm3 + movdqa [rdx+r9], xmm5 + movdqa [rdx+r9+16], xmm7 + + lea rdx, [rdx+r8*4] + + ; col 2 + + movdqa xmm0, [rcx+16*8] + movdqa xmm1, [rcx+16*9] + movdqa xmm2, [rcx+16*10] + movdqa xmm3, [rcx+16*11] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm8 + pandn xmm1, xmm0 + pand xmm0, xmm8 + pshufd xmm1, xmm1, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm8 + pandn xmm3, xmm2 + pand xmm2, xmm8 + pshufd xmm3, xmm3, 0b1h + psrlq xmm3, 4 + + movdqa xmm5, xmm8 + pandn xmm5, xmm4 + pand xmm4, xmm8 + pshufd xmm5, xmm5, 0b1h + psrlq xmm5, 4 + + movdqa xmm7, xmm8 + pandn xmm7, xmm6 + pand xmm6, xmm8 + pshufd xmm7, xmm7, 0b1h + psrlq xmm7, 4 + + movdqa [rdx], xmm0 + movdqa [rdx+16], xmm2 + movdqa [rdx+r8], xmm4 + movdqa [rdx+r8+16], xmm6 + + movdqa [rdx+r8*2], xmm1 + movdqa [rdx+r8*2+16], xmm3 + movdqa [rdx+r9], xmm5 + movdqa [rdx+r9+16], xmm7 + + lea rdx, [rdx+r8*4] + + ; col 3 + + movdqa xmm0, [rcx+16*12] + movdqa xmm1, [rcx+16*13] + movdqa xmm2, [rcx+16*14] + movdqa xmm3, [rcx+16*15] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 4, 2, 6, 1, 3 + punpck bw, 0, 4, 1, 3, 2, 6 + + movdqa xmm1, xmm8 + pandn xmm1, xmm0 + pand xmm0, xmm8 + pshufd xmm0, xmm0, 0b1h + psrlq xmm1, 4 + + movdqa xmm3, xmm8 + pandn xmm3, xmm2 + pand xmm2, xmm8 + pshufd xmm2, xmm2, 0b1h + psrlq xmm3, 4 + + movdqa xmm5, xmm8 + pandn xmm5, xmm4 + pand xmm4, xmm8 + pshufd xmm4, xmm4, 0b1h + psrlq xmm5, 4 + + movdqa xmm7, xmm8 + pandn xmm7, xmm6 + pand xmm6, xmm8 + pshufd xmm6, xmm6, 0b1h + psrlq xmm7, 4 + + movdqa [rdx], xmm0 + movdqa [rdx+16], xmm2 + movdqa [rdx+r8], xmm4 + movdqa [rdx+r8+16], xmm6 + + movdqa [rdx+r8*2], xmm1 + movdqa [rdx+r8*2+16], xmm3 + movdqa [rdx+r9], xmm5 + movdqa [rdx+r9+16], xmm7 + + ; lea rdx, [rdx+r8*4] + + ret + +unSwizzleBlock4P_amd64 endp + +; +; swizzling +; + +; +; SwizzleBlock32_amd64 +; + +SwizzleBlock32_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 4 + + cmp r9d, 0ffffffffh + jnz SwizzleBlock32_amd64@WM + + align 16 +@@: + movdqa xmm0, [rsi] + movdqa xmm4, [rsi+16] + movdqa xmm1, [rsi+r8] + movdqa xmm5, [rsi+r8+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm2 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm6 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock32_amd64@WM: + + movd xmm7, r9d + pshufd xmm7, xmm7, 0 + + align 16 +@@: + movdqa xmm0, [rsi] + movdqa xmm4, [rsi+16] + movdqa xmm1, [rsi+r8] + movdqa xmm5, [rsi+r8+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + movdqa xmm9, xmm7 + pshufd xmm11, xmm7, 0e4h + + pandn xmm3, [rdi+16*0] + pand xmm0, xmm7 + por xmm0, xmm3 + movdqa [rdi+16*0], xmm0 + + pandn xmm5, [rdi+16*1] + pand xmm2, xmm7 + por xmm2, xmm5 + movdqa [rdi+16*1], xmm2 + + pandn xmm9, [rdi+16*2] + pand xmm4, xmm7 + por xmm4, xmm9 + movdqa [rdi+16*2], xmm4 + + pandn xmm11, [rdi+16*3] + pand xmm6, xmm7 + por xmm6, xmm11 + movdqa [edi+16*3], xmm6 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock32_amd64 endp + +; +; SwizzleBlock16_amd64 +; + +SwizzleBlock16_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 4 + + align 16 +@@: + movdqa xmm0, [rsi] + movdqa xmm1, [rsi+16] + movdqa xmm2, [rsi+r8] + movdqa xmm3, [rsi+r8+16] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 5 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm1 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm5 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock16_amd64 endp + +; +; SwizzleBlock8 +; + +SwizzleBlock8_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov ecx, 2 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [rsi] + movdqa xmm2, [rsi+r8] + lea rsi, [rsi+r8*2] + + pshufd xmm1, [rsi], 0b1h + pshufd xmm3, [rsi+r8], 0b1h + lea rsi, [rsi+r8*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm4 + movdqa [rdi+16*2], xmm1 + movdqa [rdi+16*3], xmm5 + + ; col 1, 3 + + pshufd xmm0, [rsi], 0b1h + pshufd xmm2, [rsi+r8], 0b1h + lea rsi, [rsi+r8*2] + + movdqa xmm1, [rsi] + movdqa xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [rdi+16*4], xmm0 + movdqa [rdi+16*5], xmm4 + movdqa [rdi+16*6], xmm1 + movdqa [rdi+16*7], xmm5 + + add edi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock8_amd64 endp + +; +; SwizzleBlock4 +; + +SwizzleBlock4_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 2 + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + align 16 +@@: + ; col 0, 2 + + movdqa xmm0, [rsi] + movdqa xmm2, [rsi+r8] + lea rsi, [rsi+r8*2] + + movdqa xmm1, [rsi] + movdqa xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + punpcknbl + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm1 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm3 + + ; col 1, 3 + + movdqa xmm0, [rsi] + movdqa xmm2, [rsi+r8] + lea esi, [rsi+r8*2] + + movdqa xmm1, [rsi] + movdqa xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + pshuflw xmm0, xmm0, 0b1h + pshuflw xmm2, xmm2, 0b1h + pshufhw xmm0, xmm0, 0b1h + pshufhw xmm2, xmm2, 0b1h + + punpcknbl + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [rdi+16*4], xmm0 + movdqa [rdi+16*5], xmm1 + movdqa [rdi+16*6], xmm4 + movdqa [rdi+16*7], xmm3 + + add rdi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock4_amd64 endp + +; +; swizzling with unaligned reads +; + +; +; SwizzleBlock32u_amd64 +; + +SwizzleBlock32u_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 4 + + cmp r9d, 0ffffffffh + jnz SwizzleBlock32u_amd64@WM + + align 16 +@@: + movdqu xmm0, [rsi] + movdqu xmm4, [rsi+16] + movdqu xmm1, [rsi+r8] + movdqu xmm5, [rsi+r8+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm2 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm6 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock32u_amd64@WM: + + movd xmm7, r9d + pshufd xmm7, xmm7, 0 + + align 16 +@@: + movdqu xmm0, [rsi] + movdqu xmm4, [rsi+16] + movdqu xmm1, [rsi+r8] + movdqu xmm5, [rsi+r8+16] + + punpck qdq, 0, 4, 1, 5, 2, 6 + + movdqa xmm3, xmm7 + pshufd xmm5, xmm7, 0e4h + movdqa xmm9, xmm7 + pshufd xmm11, xmm7, 0e4h + + pandn xmm3, [rdi+16*0] + pand xmm0, xmm7 + por xmm0, xmm3 + movdqa [rdi+16*0], xmm0 + + pandn xmm5, [rdi+16*1] + pand xmm2, xmm7 + por xmm2, xmm5 + movdqa [rdi+16*1], xmm2 + + pandn xmm9, [rdi+16*2] + pand xmm4, xmm7 + por xmm4, xmm9 + movdqa [rdi+16*2], xmm4 + + pandn xmm11, [rdi+16*3] + pand xmm6, xmm7 + por xmm6, xmm11 + movdqa [edi+16*3], xmm6 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock32u_amd64 endp + +; +; SwizzleBlock16u_amd64 +; + +SwizzleBlock16u_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 4 + + align 16 +@@: + movdqu xmm0, [rsi] + movdqu xmm1, [rsi+16] + movdqu xmm2, [rsi+r8] + movdqu xmm3, [rsi+r8+16] + + punpck wd, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 5 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm1 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm5 + + lea rsi, [rsi+r8*2] + add rdi, 64 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock16u_amd64 endp + +; +; SwizzleBlock8u +; + +SwizzleBlock8u_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov ecx, 2 + + align 16 +@@: + ; col 0, 2 + + movdqu xmm0, [rsi] + movdqu xmm2, [rsi+r8] + lea rsi, [rsi+r8*2] + + movdqu xmm1, [rsi] + movdqu xmm3, [rsi+r8] + pshufd xmm1, xmm1, 0b1h + pshufd xmm3, xmm3, 0b1h + lea rsi, [rsi+r8*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm4 + movdqa [rdi+16*2], xmm1 + movdqa [rdi+16*3], xmm5 + + ; col 1, 3 + + movdqu xmm0, [rsi] + movdqu xmm2, [rsi+r8] + pshufd xmm0, xmm0, 0b1h + pshufd xmm2, xmm2, 0b1h + lea rsi, [rsi+r8*2] + + movdqu xmm1, [rsi] + movdqu xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + punpck bw, 0, 2, 1, 3, 4, 6 + punpck wd, 0, 2, 4, 6, 1, 3 + punpck qdq, 0, 1, 2, 3, 4, 5 + + movdqa [rdi+16*4], xmm0 + movdqa [rdi+16*5], xmm4 + movdqa [rdi+16*6], xmm1 + movdqa [rdi+16*7], xmm5 + + add edi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock8u_amd64 endp + +; +; SwizzleBlock4u +; + +SwizzleBlock4u_amd64 proc public + + push rsi + push rdi + + mov rdi, rcx + mov rsi, rdx + mov rcx, 2 + + mov eax, 0f0f0f0fh + movd xmm7, eax + pshufd xmm7, xmm7, 0 + + align 16 +@@: + ; col 0, 2 + + movdqu xmm0, [rsi] + movdqu xmm2, [rsi+r8] + lea rsi, [rsi+r8*2] + + movdqu xmm1, [rsi] + movdqu xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + pshuflw xmm1, xmm1, 0b1h + pshuflw xmm3, xmm3, 0b1h + pshufhw xmm1, xmm1, 0b1h + pshufhw xmm3, xmm3, 0b1h + + punpcknbl + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [rdi+16*0], xmm0 + movdqa [rdi+16*1], xmm1 + movdqa [rdi+16*2], xmm4 + movdqa [rdi+16*3], xmm3 + + ; col 1, 3 + + movdqu xmm0, [rsi] + movdqu xmm2, [rsi+r8] + lea esi, [rsi+r8*2] + + movdqu xmm1, [rsi] + movdqu xmm3, [rsi+r8] + lea rsi, [rsi+r8*2] + + pshuflw xmm0, xmm0, 0b1h + pshuflw xmm2, xmm2, 0b1h + pshufhw xmm0, xmm0, 0b1h + pshufhw xmm2, xmm2, 0b1h + + punpcknbl + punpck bw, 0, 2, 4, 6, 1, 3 + punpck bw, 0, 2, 1, 3, 4, 6 + punpck qdq, 0, 4, 2, 6, 1, 3 + + movdqa [rdi+16*4], xmm0 + movdqa [rdi+16*5], xmm1 + movdqa [rdi+16*6], xmm4 + movdqa [rdi+16*7], xmm3 + + add rdi, 128 + + dec rcx + jnz @B + + pop rdi + pop rsi + + ret + +SwizzleBlock4u_amd64 endp + + end + \ No newline at end of file diff --git a/gsdx/x86.cpp b/gsdx/x86.cpp new file mode 100644 index 0000000..677250a --- /dev/null +++ b/gsdx/x86.cpp @@ -0,0 +1,836 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTables.h" +#include "x86.h" + +// unswizzling + +void __fastcall unSwizzleBlock32_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable32[0][0]; + + for(int j = 0; j < 8; j++, s += 8, dst += dstpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)dst)[i] = ((DWORD*)src)[s[i]]; +} + +void __fastcall unSwizzleBlock16_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable16[0][0]; + + for(int j = 0; j < 8; j++, s += 16, dst += dstpitch) + for(int i = 0; i < 16; i++) + ((WORD*)dst)[i] = ((WORD*)src)[s[i]]; +} + +void __fastcall unSwizzleBlock8_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable8[0][0]; + + for(int j = 0; j < 16; j++, s += 16, dst += dstpitch) + for(int i = 0; i < 16; i++) + dst[i] = src[s[i]]; +} + +void __fastcall unSwizzleBlock4_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable4[0][0]; + + for(int j = 0; j < 16; j++, s += 32, dst += dstpitch) + { + for(int i = 0; i < 32; i++) + { + DWORD addr = s[i]; + BYTE c = (src[addr>>1] >> ((addr&1) << 2)) & 0x0f; + int shift = (i&1) << 2; + dst[i >> 1] = (dst[i >> 1] & (0xf0 >> shift)) | (c << shift); + } + } +} + +void __fastcall unSwizzleBlock8HP_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable32[0][0]; + + for(int j = 0; j < 8; j++, s += 8, dst += dstpitch) + for(int i = 0; i < 8; i++) + dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>24); +} + +void __fastcall unSwizzleBlock4HLP_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable32[0][0]; + + for(int j = 0; j < 8; j++, s += 8, dst += dstpitch) + for(int i = 0; i < 8; i++) + dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>24)&0xf; +} + +void __fastcall unSwizzleBlock4HHP_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable32[0][0]; + + for(int j = 0; j < 8; j++, s += 8, dst += dstpitch) + for(int i = 0; i < 8; i++) + dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>28); +} + +void __fastcall unSwizzleBlock4P_c(BYTE* src, BYTE* dst, int dstpitch) +{ + const DWORD* s = &columnTable4[0][0]; + + for(int j = 0; j < 16; j++, s += 32, dst += dstpitch) + { + for(int i = 0; i < 32; i++) + { + DWORD addr = s[i]; + dst[i] = (src[addr>>1] >> ((addr&1) << 2)) & 0x0f; + } + } +} + +// swizzling + +void __fastcall SwizzleBlock32_c(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask) +{ + const DWORD* d = &columnTable32[0][0]; + + if(WriteMask == 0xffffffff) + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)dst)[d[i]] = ((DWORD*)src)[i]; + } + else + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)dst)[d[i]] = (((DWORD*)dst)[d[i]] & ~WriteMask) | (((DWORD*)src)[i] & WriteMask); + } +} + +void __fastcall SwizzleBlock16_c(BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable16[0][0]; + + for(int j = 0; j < 8; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + ((WORD*)dst)[d[i]] = ((WORD*)src)[i]; +} + +void __fastcall SwizzleBlock8_c(BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable8[0][0]; + + for(int j = 0; j < 16; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + dst[d[i]] = src[i]; +} + +void __fastcall SwizzleBlock4_c(BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable4[0][0]; + + for(int j = 0; j < 16; j++, d += 32, src += srcpitch) + { + for(int i = 0; i < 32; i++) + { + DWORD addr = d[i]; + BYTE c = (src[i>>1] >> ((i&1) << 2)) & 0x0f; + DWORD shift = (addr&1) << 2; + dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift); + } + } +} + +// column swizzling (TODO: sse2) + +void __fastcall SwizzleColumn32_c(int y, BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask) +{ + const DWORD* d = &columnTable32[((y/2)&3)*2][0]; + + if(WriteMask == 0xffffffff) + { + for(int j = 0; j < 2; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)dst)[d[i]] = ((DWORD*)src)[i]; + } + else + { + for(int j = 0; j < 2; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((DWORD*)dst)[d[i]] = (((DWORD*)dst)[d[i]] & ~WriteMask) | (((DWORD*)src)[i] & WriteMask); + } +} + +void __fastcall SwizzleColumn16_c(int y, BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable16[((y/2)&3)*2][0]; + + for(int j = 0; j < 2; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + ((WORD*)dst)[d[i]] = ((WORD*)src)[i]; +} + +void __fastcall SwizzleColumn8_c(int y, BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable8[((y/4)&3)*4][0]; + + for(int j = 0; j < 4; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + dst[d[i]] = src[i]; +} + +void __fastcall SwizzleColumn4_c(int y, BYTE* dst, BYTE* src, int srcpitch) +{ + const DWORD* d = &columnTable4[y&(3<<2)][0]; // ((y/4)&3)*4 + + for(int j = 0; j < 4; j++, d += 32, src += srcpitch) + { + for(int i = 0; i < 32; i++) + { + DWORD addr = d[i]; + BYTE c = (src[i>>1] >> ((i&1) << 2)) & 0x0f; + DWORD shift = (addr&1) << 2; + dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift); + } + } +} + +// + +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + +static __m128i s_zero = _mm_setzero_si128(); +static __m128i s_bgrm = _mm_set1_epi32(0x00ffffff); +static __m128i s_am = _mm_set1_epi32(0x00008000); +static __m128i s_bm = _mm_set1_epi32(0x00007c00); +static __m128i s_gm = _mm_set1_epi32(0x000003e0); +static __m128i s_rm = _mm_set1_epi32(0x0000001f); + +void __fastcall ExpandBlock24_sse2(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA) +{ + __m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24); + + if(!pTEXA->AEM) + { + for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2) + { + for(int i = 0; i < 8; i += 4) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + c = _mm_and_si128(c, s_bgrm); + c = _mm_or_si128(c, TA0); + _mm_store_si128((__m128i*)&dst[i], c); + } + } + } + else + { + for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2) + { + for(int i = 0; i < 8; i += 4) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + c = _mm_and_si128(c, s_bgrm); + __m128i a = _mm_andnot_si128(_mm_cmpeq_epi32(c, s_zero), TA0); + c = _mm_or_si128(c, a); + _mm_store_si128((__m128i*)&dst[i], c); + } + } + } +} + +void __fastcall ExpandBlock16_sse2(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA) +{ + __m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24); + __m128i TA1 = _mm_set1_epi32((DWORD)pTEXA->TA1 << 24); + __m128i a, b, g, r; + + if(!pTEXA->AEM) + { + for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2) + { + for(int i = 0; i < 16; i += 8) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + + __m128i cl = _mm_unpacklo_epi16(c, s_zero); + __m128i ch = _mm_unpackhi_epi16(c, s_zero); + + __m128i alm = _mm_cmplt_epi32(cl, s_am); + __m128i ahm = _mm_cmplt_epi32(ch, s_am); + + // lo + + b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1)); + + cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i], cl); + + // hi + + b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1)); + + ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i+4], ch); + } + } + } + else + { + for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2) + { + for(int i = 0; i < 16; i += 8) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + + __m128i cl = _mm_unpacklo_epi16(c, s_zero); + __m128i ch = _mm_unpackhi_epi16(c, s_zero); + + __m128i alm = _mm_cmplt_epi32(cl, s_am); + __m128i ahm = _mm_cmplt_epi32(ch, s_am); + + __m128i trm = _mm_cmpeq_epi16(c, s_zero); + __m128i trlm = _mm_unpacklo_epi16(trm, trm); + __m128i trhm = _mm_unpackhi_epi16(trm, trm); + + // lo + + b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1)); + a = _mm_andnot_si128(trlm, a); + + cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i], cl); + + // hi + + b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1)); + a = _mm_andnot_si128(trhm, a); + + ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i+4], ch); + } + } + } +} + +void __fastcall Expand16_sse2(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA) +{ + ASSERT(!(w&7)); + + __m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24); + __m128i TA1 = _mm_set1_epi32((DWORD)pTEXA->TA1 << 24); + __m128i a, b, g, r; + + if(!pTEXA->AEM) + { + for(int i = 0; i < w; i += 8) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + + __m128i cl = _mm_unpacklo_epi16(c, s_zero); + __m128i ch = _mm_unpackhi_epi16(c, s_zero); + + __m128i alm = _mm_cmplt_epi32(cl, s_am); + __m128i ahm = _mm_cmplt_epi32(ch, s_am); + + // lo + + b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1)); + + cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i], cl); + + // hi + + b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1)); + + ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i+4], ch); + } + } + else + { + for(int i = 0; i < w; i += 8) + { + __m128i c = _mm_load_si128((__m128i*)&src[i]); + + __m128i cl = _mm_unpacklo_epi16(c, s_zero); + __m128i ch = _mm_unpackhi_epi16(c, s_zero); + + __m128i alm = _mm_cmplt_epi32(cl, s_am); + __m128i ahm = _mm_cmplt_epi32(ch, s_am); + + __m128i trm = _mm_cmpeq_epi16(c, s_zero); + __m128i trlm = _mm_unpacklo_epi16(trm, trm); + __m128i trhm = _mm_unpackhi_epi16(trm, trm); + + // lo + + b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1)); + a = _mm_andnot_si128(trlm, a); + + cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i], cl); + + // hi + + b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9); + g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6); + r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3); + a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1)); + a = _mm_andnot_si128(trhm, a); + + ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r)); + + _mm_store_si128((__m128i*)&dst[i+4], ch); + } + } +} + +#endif + +void __fastcall ExpandBlock24_c(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA) +{ + DWORD TA0 = (DWORD)pTEXA->TA0 << 24; + + if(!pTEXA->AEM) + { + for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2) + for(int i = 0; i < 8; i++) + dst[i] = TA0 | (src[i]&0xffffff); + } + else + { + for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2) + for(int i = 0; i < 8; i++) + dst[i] = ((src[i]&0xffffff) ? TA0 : 0) | (src[i]&0xffffff); + } +} + +void __fastcall ExpandBlock16_c(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA) +{ + DWORD TA0 = (DWORD)pTEXA->TA0 << 24; + DWORD TA1 = (DWORD)pTEXA->TA1 << 24; + + if(!pTEXA->AEM) + { + for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2) + for(int i = 0; i < 16; i++) + dst[i] = ((src[i]&0x8000) ? TA1 : TA0) + | ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3); + } + else + { + for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2) + for(int i = 0; i < 16; i++) + dst[i] = ((src[i]&0x8000) ? TA1 : src[i] ? TA0 : 0) + | ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3); + } +} + +void __fastcall Expand16_c(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA) +{ + DWORD TA0 = (DWORD)pTEXA->TA0 << 24; + DWORD TA1 = (DWORD)pTEXA->TA1 << 24; + + if(!pTEXA->AEM) + { + for(int i = 0; i < w; i++) + dst[i] = ((src[i]&0x8000) ? TA1 : TA0) + | ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3); + } + else + { + for(int i = 0; i < w; i++) + dst[i] = ((src[i]&0x8000) ? TA1 : src[i] ? TA0 : 0) + | ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3); + } +} + +// + +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + +static __m128 s_uvmin = _mm_set1_ps(+1e10); +static __m128 s_uvmax = _mm_set1_ps(-1e10); + +void __fastcall UVMinMax_sse2(int nVertices, vertex_t* pVertices, uvmm_t* uv) +{ + __m128 uvmin = s_uvmin; + __m128 uvmax = s_uvmax; + + __m128* p = (__m128*)pVertices + 1; + + int i = 0; + + nVertices -= 5; + + for(; i < nVertices; i += 6) // 6 regs for loading, 2 regs for min/max + { + uvmin = _mm_min_ps(uvmin, p[(i+0)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+0)*2]); + uvmin = _mm_min_ps(uvmin, p[(i+1)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+1)*2]); + uvmin = _mm_min_ps(uvmin, p[(i+2)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+2)*2]); + uvmin = _mm_min_ps(uvmin, p[(i+3)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+3)*2]); + uvmin = _mm_min_ps(uvmin, p[(i+4)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+4)*2]); + uvmin = _mm_min_ps(uvmin, p[(i+5)*2]); + uvmax = _mm_max_ps(uvmax, p[(i+5)*2]); + } + + nVertices += 5; + + for(; i < nVertices; i++) + { + uvmin = _mm_min_ps(uvmin, p[i*2]); + uvmax = _mm_max_ps(uvmax, p[i*2]); + } + + _mm_storeh_pi((__m64*)uv, uvmin); + _mm_storeh_pi((__m64*)uv + 1, uvmax); +} + +#endif + +void __fastcall UVMinMax_c(int nVertices, vertex_t* pVertices, uvmm_t* uv) +{ + uv->umin = uv->vmin = +1e10; + uv->umax = uv->vmax = -1e10; + + for(; nVertices-- > 0; pVertices++) + { + float u = pVertices->u; + if(uv->umax < u) uv->umax = u; + if(uv->umin > u) uv->umin = u; + float v = pVertices->v; + if(uv->vmax < v) uv->vmax = v; + if(uv->vmin > v) uv->vmin = v; + } +} + +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + +static __m128i s_clut[64]; + +void __fastcall WriteCLUT_T16_I8_CSM1_sse2(WORD* vm, WORD* clut) +{ + __m128i* src = (__m128i*)vm; + __m128i* dst = (__m128i*)clut; + + for(int i = 0; i < 32; i += 4) + { + __m128i r0 = _mm_load_si128(&src[i+0]); + __m128i r1 = _mm_load_si128(&src[i+1]); + __m128i r2 = _mm_load_si128(&src[i+2]); + __m128i r3 = _mm_load_si128(&src[i+3]); + + __m128i r4 = _mm_unpacklo_epi16(r0, r1); + __m128i r5 = _mm_unpackhi_epi16(r0, r1); + __m128i r6 = _mm_unpacklo_epi16(r2, r3); + __m128i r7 = _mm_unpackhi_epi16(r2, r3); + + r0 = _mm_unpacklo_epi32(r4, r6); + r1 = _mm_unpackhi_epi32(r4, r6); + r2 = _mm_unpacklo_epi32(r5, r7); + r3 = _mm_unpackhi_epi32(r5, r7); + + r4 = _mm_unpacklo_epi16(r0, r1); + r5 = _mm_unpackhi_epi16(r0, r1); + r6 = _mm_unpacklo_epi16(r2, r3); + r7 = _mm_unpackhi_epi16(r2, r3); + + _mm_store_si128(&dst[i+0], r4); + _mm_store_si128(&dst[i+1], r6); + _mm_store_si128(&dst[i+2], r5); + _mm_store_si128(&dst[i+3], r7); + } +} + +void __fastcall WriteCLUT_T32_I8_CSM1_sse2(DWORD* vm, WORD* clut) +{ + __m128i* src = (__m128i*)vm; + __m128i* dst = s_clut; + + for(int j = 0; j < 64; j += 32, src += 32, dst += 32) + { + for(int i = 0; i < 16; i += 4) + { + __m128i r0 = _mm_load_si128(&src[i+0]); + __m128i r1 = _mm_load_si128(&src[i+1]); + __m128i r2 = _mm_load_si128(&src[i+2]); + __m128i r3 = _mm_load_si128(&src[i+3]); + + _mm_store_si128(&dst[i*2+0], _mm_unpacklo_epi64(r0, r1)); + _mm_store_si128(&dst[i*2+1], _mm_unpacklo_epi64(r2, r3)); + _mm_store_si128(&dst[i*2+2], _mm_unpackhi_epi64(r0, r1)); + _mm_store_si128(&dst[i*2+3], _mm_unpackhi_epi64(r2, r3)); + + __m128i r4 = _mm_load_si128(&src[i+0+16]); + __m128i r5 = _mm_load_si128(&src[i+1+16]); + __m128i r6 = _mm_load_si128(&src[i+2+16]); + __m128i r7 = _mm_load_si128(&src[i+3+16]); + + _mm_store_si128(&dst[i*2+4], _mm_unpacklo_epi64(r4, r5)); + _mm_store_si128(&dst[i*2+5], _mm_unpacklo_epi64(r6, r7)); + _mm_store_si128(&dst[i*2+6], _mm_unpackhi_epi64(r4, r5)); + _mm_store_si128(&dst[i*2+7], _mm_unpackhi_epi64(r6, r7)); + } + } + + for(int i = 0; i < 32; i++) + { + __m128i r0 = s_clut[i*2]; + __m128i r1 = s_clut[i*2+1]; + __m128i r2 = _mm_unpacklo_epi16(r0, r1); + __m128i r3 = _mm_unpackhi_epi16(r0, r1); + r0 = _mm_unpacklo_epi16(r2, r3); + r1 = _mm_unpackhi_epi16(r2, r3); + r2 = _mm_unpacklo_epi16(r0, r1); + r3 = _mm_unpackhi_epi16(r0, r1); + _mm_store_si128(&((__m128i*)clut)[i], r2); + _mm_store_si128(&((__m128i*)clut)[i+32], r3); + } +} + +void __fastcall WriteCLUT_T16_I4_CSM1_sse2(WORD* vm, WORD* clut) +{ + // TODO (probably not worth, _c is going to be just as fast) + WriteCLUT_T16_I4_CSM1_c(vm, clut); +} + +void __fastcall WriteCLUT_T32_I4_CSM1_sse2(DWORD* vm, WORD* clut) +{ + __m128i* src = (__m128i*)vm; + __m128i* dst = s_clut; + + __m128i r0 = _mm_load_si128(&src[0]); + __m128i r1 = _mm_load_si128(&src[1]); + __m128i r2 = _mm_load_si128(&src[2]); + __m128i r3 = _mm_load_si128(&src[3]); + + _mm_store_si128(&dst[0], _mm_unpacklo_epi64(r0, r1)); + _mm_store_si128(&dst[1], _mm_unpacklo_epi64(r2, r3)); + _mm_store_si128(&dst[2], _mm_unpackhi_epi64(r0, r1)); + _mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3)); + + for(int i = 0; i < 2; i++) + { + __m128i r0 = s_clut[i*2]; + __m128i r1 = s_clut[i*2+1]; + __m128i r2 = _mm_unpacklo_epi16(r0, r1); + __m128i r3 = _mm_unpackhi_epi16(r0, r1); + r0 = _mm_unpacklo_epi16(r2, r3); + r1 = _mm_unpackhi_epi16(r2, r3); + r2 = _mm_unpacklo_epi16(r0, r1); + r3 = _mm_unpackhi_epi16(r0, r1); + _mm_store_si128(&((__m128i*)clut)[i], r2); + _mm_store_si128(&((__m128i*)clut)[i+32], r3); + } +} + +#endif + +void __fastcall WriteCLUT_T16_I8_CSM1_c(WORD* vm, WORD* clut) +{ + const static DWORD map[] = + { + 0, 2, 8, 10, 16, 18, 24, 26, + 4, 6, 12, 14, 20, 22, 28, 30, + 1, 3, 9, 11, 17, 19, 25, 27, + 5, 7, 13, 15, 21, 23, 29, 31 + }; + + for(int j = 0; j < 8; j++, vm += 32, clut += 32) + { + for(int i = 0; i < 32; i++) + { + clut[i] = vm[map[i]]; + } + } +} + +void __fastcall WriteCLUT_T32_I8_CSM1_c(DWORD* vm, WORD* clut) +{ + const static DWORD map[] = + { + 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, + 64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79, + 16, 17, 20, 21, 24, 25, 28, 29, 18, 19, 22, 23, 26, 27, 30, 31, + 80, 81, 84, 85, 88, 89, 92, 93, 82, 83, 86, 87, 90, 91, 94, 95, + 32, 33, 36, 37, 40, 41, 44, 45, 34, 35, 38, 39, 42, 43, 46, 47, + 96, 97, 100, 101, 104, 105, 108, 109, 98, 99, 102, 103, 106, 107, 110, 111, + 48, 49, 52, 53, 56, 57, 60, 61, 50, 51, 54, 55, 58, 59, 62, 63, + 112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127 + }; + + for(int j = 0; j < 2; j++, vm += 128, clut += 128) + { + for(int i = 0; i < 128; i++) + { + DWORD dw = vm[map[i]]; + clut[i] = (WORD)(dw & 0xffff); + clut[i+256] = (WORD)(dw >> 16); + } + } +} + +void __fastcall WriteCLUT_T16_I4_CSM1_c(WORD* vm, WORD* clut) +{ + const static DWORD map[] = + { + 0, 2, 8, 10, 16, 18, 24, 26, + 4, 6, 12, 14, 20, 22, 28, 30 + }; + + for(int i = 0; i < 16; i++) + { + clut[i] = vm[map[i]]; + } +} + +void __fastcall WriteCLUT_T32_I4_CSM1_c(DWORD* vm, WORD* clut) +{ + const static DWORD map[] = + { + 0, 1, 4, 5, 8, 9, 12, 13, + 2, 3, 6, 7, 10, 11, 14, 15 + }; + + for(int i = 0; i < 16; i++) + { + DWORD dw = vm[map[i]]; + clut[i] = (WORD)(dw & 0xffff); + clut[i+256] = (WORD)(dw >> 16); + } +} + +// + +#if defined(_M_AMD64) || _M_IX86_FP >= 2 + +extern "C" void __fastcall ReadCLUT32_T32_I8_sse2(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 256; i += 16) + { + ReadCLUT32_T32_I4_sse2(&src[i], &dst[i]); // going to be inlined nicely + } +} + +extern "C" void __fastcall ReadCLUT32_T32_I4_sse2(WORD* src, DWORD* dst) +{ + __m128i r0 = ((__m128i*)src)[0]; + __m128i r1 = ((__m128i*)src)[1]; + __m128i r2 = ((__m128i*)src)[0+32]; + __m128i r3 = ((__m128i*)src)[1+32]; + _mm_store_si128(&((__m128i*)dst)[0], _mm_unpacklo_epi16(r0, r2)); + _mm_store_si128(&((__m128i*)dst)[1], _mm_unpackhi_epi16(r0, r2)); + _mm_store_si128(&((__m128i*)dst)[2], _mm_unpacklo_epi16(r1, r3)); + _mm_store_si128(&((__m128i*)dst)[3], _mm_unpackhi_epi16(r1, r3)); +} + +extern "C" void __fastcall ReadCLUT32_T16_I8_sse2(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 256; i += 16) + { + ReadCLUT32_T16_I4_sse2(&src[i], &dst[i]); + } +} + +extern "C" void __fastcall ReadCLUT32_T16_I4_sse2(WORD* src, DWORD* dst) +{ + __m128i r0 = ((__m128i*)src)[0]; + __m128i r1 = ((__m128i*)src)[1]; + _mm_store_si128(&((__m128i*)dst)[0], _mm_unpacklo_epi16(r0, s_zero)); + _mm_store_si128(&((__m128i*)dst)[1], _mm_unpackhi_epi16(r0, s_zero)); + _mm_store_si128(&((__m128i*)dst)[2], _mm_unpacklo_epi16(r1, s_zero)); + _mm_store_si128(&((__m128i*)dst)[3], _mm_unpackhi_epi16(r1, s_zero)); +} + +#endif + +void __fastcall ReadCLUT32_T32_I8_c(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 256; i++) + { + dst[i] = ((DWORD)src[i+256] << 16) | src[i]; + } +} + +void __fastcall ReadCLUT32_T32_I4_c(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 16; i++) + { + dst[i] = ((DWORD)src[i+256] << 16) | src[i]; + } +} + +void __fastcall ReadCLUT32_T16_I8_c(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 256; i++) + { + dst[i] = (DWORD)src[i]; + } +} + +void __fastcall ReadCLUT32_T16_I4_c(WORD* src, DWORD* dst) +{ + for(int i = 0; i < 16; i++) + { + dst[i] = (DWORD)src[i]; + } +} + +// \ No newline at end of file diff --git a/gsdx/x86.h b/gsdx/x86.h new file mode 100644 index 0000000..9c24ea6 --- /dev/null +++ b/gsdx/x86.h @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GS.h" + +extern "C" void __fastcall memsetd(void* dst, unsigned int c, size_t len); + +extern "C" void unSwizzleBlock32_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock16_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock8_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock4_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock8HP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock4HLP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock4HHP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void unSwizzleBlock4P_amd64(BYTE* src, BYTE* dst, __int64 dstpitch); +extern "C" void SwizzleBlock32_amd64(BYTE* dst, BYTE* src, __int64 srcpitch, DWORD WriteMask = 0xffffffff); +extern "C" void SwizzleBlock16_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void SwizzleBlock8_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void SwizzleBlock4_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void SwizzleBlock32u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch, DWORD WriteMask = 0xffffffff); +extern "C" void SwizzleBlock16u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void SwizzleBlock8u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void SwizzleBlock4u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch); +extern "C" void __fastcall unSwizzleBlock32_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock16_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock8_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock4_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock8HP_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock4HLP_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock4HHP_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall unSwizzleBlock4P_sse2(BYTE* src, BYTE* dst, int dstpitch); +extern "C" void __fastcall SwizzleBlock32_sse2(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock16_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock8_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock4_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock32u_sse2(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock16u_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock8u_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock4u_sse2(BYTE* dst, BYTE* src, int srcpitch); +extern void __fastcall unSwizzleBlock32_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock16_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock8_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock4_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock8HP_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock4HLP_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock4HHP_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall unSwizzleBlock4P_c(BYTE* src, BYTE* dst, int dstpitch); +extern void __fastcall SwizzleBlock32_c(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff); +extern void __fastcall SwizzleBlock16_c(BYTE* dst, BYTE* src, int srcpitch); +extern void __fastcall SwizzleBlock8_c(BYTE* dst, BYTE* src, int srcpitch); +extern void __fastcall SwizzleBlock4_c(BYTE* dst, BYTE* src, int srcpitch); + +extern void __fastcall SwizzleColumn32_c(int y, BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff); +extern void __fastcall SwizzleColumn16_c(int y, BYTE* dst, BYTE* src, int srcpitch); +extern void __fastcall SwizzleColumn8_c(int y, BYTE* dst, BYTE* src, int srcpitch); +extern void __fastcall SwizzleColumn4_c(int y, BYTE* dst, BYTE* src, int srcpitch); + +extern void __fastcall ExpandBlock24_sse2(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA); +extern void __fastcall ExpandBlock16_sse2(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA); +extern void __fastcall Expand16_sse2(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA); +extern void __fastcall ExpandBlock24_c(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA); +extern void __fastcall ExpandBlock16_c(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA); +extern void __fastcall Expand16_c(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA); + +extern "C" void SaturateColor_amd64(int* c); +extern "C" void __fastcall SaturateColor_sse2(int* c); +extern "C" void __fastcall SaturateColor_asm(int* c); + +struct uvmm_t {float umin, vmin, umax, vmax;}; +struct vertex_t {float xyzw[4]; DWORD color[2]; float u, v;}; +extern "C" void __fastcall UVMinMax_sse2(int nVertices, vertex_t* pVertices, uvmm_t* uv); +extern "C" void __fastcall UVMinMax_c(int nVertices, vertex_t* pVertices, uvmm_t* uv); + +extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(WORD* vm, WORD* clut); +extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(DWORD* vm, WORD* clut); +extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(WORD* vm, WORD* clut); +extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(DWORD* vm, WORD* clut); +extern void __fastcall WriteCLUT_T16_I8_CSM1_c(WORD* vm, WORD* clut); +extern void __fastcall WriteCLUT_T32_I8_CSM1_c(DWORD* vm, WORD* clut); +extern void __fastcall WriteCLUT_T16_I4_CSM1_c(WORD* vm, WORD* clut); +extern void __fastcall WriteCLUT_T32_I4_CSM1_c(DWORD* vm, WORD* clut); + +extern "C" void __fastcall ReadCLUT32_T32_I8_sse2(WORD* src, DWORD* dst); +extern "C" void __fastcall ReadCLUT32_T32_I4_sse2(WORD* src, DWORD* dst); +extern "C" void __fastcall ReadCLUT32_T16_I8_sse2(WORD* src, DWORD* dst); +extern "C" void __fastcall ReadCLUT32_T16_I4_sse2(WORD* src, DWORD* dst); +extern void __fastcall ReadCLUT32_T32_I8_c(WORD* src, DWORD* dst); +extern void __fastcall ReadCLUT32_T32_I4_c(WORD* src, DWORD* dst); +extern void __fastcall ReadCLUT32_T16_I8_c(WORD* src, DWORD* dst); +extern void __fastcall ReadCLUT32_T16_I4_c(WORD* src, DWORD* dst); + +#ifdef _M_AMD64 + +#define SaturateColor SaturateColor_amd64 + +#define unSwizzleBlock32 unSwizzleBlock32_amd64 +#define unSwizzleBlock16 unSwizzleBlock16_amd64 +#define unSwizzleBlock8 unSwizzleBlock8_amd64 +#define unSwizzleBlock4 unSwizzleBlock4_amd64 +#define unSwizzleBlock8HP unSwizzleBlock8HP_amd64 +#define unSwizzleBlock4HLP unSwizzleBlock4HLP_amd64 +#define unSwizzleBlock4HHP unSwizzleBlock4HHP_amd64 +#define unSwizzleBlock4P unSwizzleBlock4P_amd64 +#define SwizzleBlock32 SwizzleBlock32_amd64 +#define SwizzleBlock16 SwizzleBlock16_amd64 +#define SwizzleBlock8 SwizzleBlock8_amd64 +#define SwizzleBlock4 SwizzleBlock4_amd64 +#define SwizzleBlock32u SwizzleBlock32u_amd64 +#define SwizzleBlock16u SwizzleBlock16u_amd64 +#define SwizzleBlock8u SwizzleBlock8u_amd64 +#define SwizzleBlock4u SwizzleBlock4u_amd64 + +#define SwizzleColumn32 SwizzleColumn32_c +#define SwizzleColumn16 SwizzleColumn16_c +#define SwizzleColumn8 SwizzleColumn8_c +#define SwizzleColumn4 SwizzleColumn4_c + +#define ExpandBlock24 ExpandBlock24_sse2 +#define ExpandBlock16 ExpandBlock16_sse2 +#define Expand16 Expand16_sse2 + +#define UVMinMax UVMinMax_sse2 + +#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2 +#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2 +#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2 +#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2 + +#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_sse2 +#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_sse2 +#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_sse2 +#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_sse2 + +#elif _M_IX86_FP >= 2 + +#define SaturateColor SaturateColor_sse2 + +#define unSwizzleBlock32 unSwizzleBlock32_sse2 +#define unSwizzleBlock16 unSwizzleBlock16_sse2 +#define unSwizzleBlock8 unSwizzleBlock8_sse2 +#define unSwizzleBlock4 unSwizzleBlock4_sse2 +#define unSwizzleBlock8HP unSwizzleBlock8HP_sse2 +#define unSwizzleBlock4HLP unSwizzleBlock4HLP_sse2 +#define unSwizzleBlock4HHP unSwizzleBlock4HHP_sse2 +#define unSwizzleBlock4P unSwizzleBlock4P_sse2 +#define SwizzleBlock32 SwizzleBlock32_sse2 +#define SwizzleBlock16 SwizzleBlock16_sse2 +#define SwizzleBlock8 SwizzleBlock8_sse2 +#define SwizzleBlock4 SwizzleBlock4_sse2 +#define SwizzleBlock32u SwizzleBlock32u_sse2 +#define SwizzleBlock16u SwizzleBlock16u_sse2 +#define SwizzleBlock8u SwizzleBlock8u_sse2 +#define SwizzleBlock4u SwizzleBlock4u_sse2 + +#define SwizzleColumn32 SwizzleColumn32_c +#define SwizzleColumn16 SwizzleColumn16_c +#define SwizzleColumn8 SwizzleColumn8_c +#define SwizzleColumn4 SwizzleColumn4_c +#define SwizzleColumn4h SwizzleColumn4h_c + +#define ExpandBlock24 ExpandBlock24_sse2 +#define ExpandBlock16 ExpandBlock16_sse2 +#define Expand16 Expand16_sse2 + +#define UVMinMax UVMinMax_sse2 + +#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2 +#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2 +#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2 +#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2 + +#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_sse2 +#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_sse2 +#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_sse2 +#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_sse2 + +#else + +#define SaturateColor SaturateColor_asm + +#define unSwizzleBlock32 unSwizzleBlock32_c +#define unSwizzleBlock16 unSwizzleBlock16_c +#define unSwizzleBlock8 unSwizzleBlock8_c +#define unSwizzleBlock4 unSwizzleBlock4_c +#define unSwizzleBlock8HP unSwizzleBlock8HP_c +#define unSwizzleBlock4HLP unSwizzleBlock4HLP_c +#define unSwizzleBlock4HHP unSwizzleBlock4HHP_c +#define unSwizzleBlock4P unSwizzleBlock4P_c +#define SwizzleBlock32 SwizzleBlock32_c +#define SwizzleBlock16 SwizzleBlock16_c +#define SwizzleBlock8 SwizzleBlock8_c +#define SwizzleBlock4 SwizzleBlock4_c +#define SwizzleBlock32u SwizzleBlock32_c +#define SwizzleBlock16u SwizzleBlock16_c +#define SwizzleBlock8u SwizzleBlock8_c +#define SwizzleBlock4u SwizzleBlock4_c + +#define SwizzleColumn32 SwizzleColumn32_c +#define SwizzleColumn16 SwizzleColumn16_c +#define SwizzleColumn8 SwizzleColumn8_c +#define SwizzleColumn4 SwizzleColumn4_c + +#define ExpandBlock24 ExpandBlock24_c +#define ExpandBlock16 ExpandBlock16_c +#define Expand16 Expand16_c + +#define UVMinMax UVMinMax_c + +#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_c +#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_c +#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_c +#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_c + +#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_c +#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_c +#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_c +#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_c + +#endif diff --git a/gsdx10/GS.cpp b/gsdx10/GS.cpp new file mode 100644 index 0000000..4a5b36b --- /dev/null +++ b/gsdx10/GS.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSdx10.h" +#include "GS.h" +#include "GSRendererHW.h" +#include "GSRendererSW.h" +#include "GSRendererNull.h" +#include "GSSettingsDlg.h" + diff --git a/gsdx10/GSDepthStencil.cpp b/gsdx10/GSDepthStencil.cpp new file mode 100644 index 0000000..7b977cc --- /dev/null +++ b/gsdx10/GSDepthStencil.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTextureCache.h" +#include "GSRendererHW.h" + +GSTextureCache::GSDepthStencil::GSDepthStencil(GSTextureCache* tc) + : GSSurface(tc) + , m_used(false) +{ +} + +bool GSTextureCache::GSDepthStencil::Create(int w, int h) +{ + HRESULT hr; + + hr = m_tc->m_renderer->m_dev.CreateDepthStencil(m_texture, w, h); + + return SUCCEEDED(hr); +} + +void GSTextureCache::GSDepthStencil::Update() +{ + __super::Update(); + + // TODO: dx 10.1 could update ds +} + diff --git a/gsdx10/GSDevice.cpp b/gsdx10/GSDevice.cpp new file mode 100644 index 0000000..5b75e4b --- /dev/null +++ b/gsdx10/GSDevice.cpp @@ -0,0 +1,775 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSDevice.h" +#include "resource.h" + +GSDevice::GSDevice() + : m_vb(NULL) + , m_vb_stride(0) + , m_layout(NULL) + , m_topology(D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED) + , m_vs(NULL) + , m_vs_cb(NULL) + , m_gs(NULL) + , m_ps(NULL) + , m_ps_ss(NULL) + , m_scissor(0, 0, 0, 0) + , m_viewport(0, 0) + , m_dss(NULL) + , m_sref(0) + , m_bs(NULL) + , m_bf(-1) + , m_rtv(NULL) + , m_dsv(NULL) +{ + memset(m_ps_srvs, 0, sizeof(m_ps_srvs)); +} + +GSDevice::~GSDevice() +{ +} + +bool GSDevice::Create(HWND hWnd) +{ + HRESULT hr; + + DXGI_SWAP_CHAIN_DESC scd; + + memset(&scd, 0, sizeof(scd)); + + scd.BufferCount = 2; + scd.BufferDesc.Width = 1; + scd.BufferDesc.Height = 1; + scd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + // scd.BufferDesc.RefreshRate.Numerator = 60; + // scd.BufferDesc.RefreshRate.Denominator = 1; + scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + scd.OutputWindow = hWnd; + scd.SampleDesc.Count = 1; + scd.SampleDesc.Quality = 0; + scd.Windowed = TRUE; + + UINT flags = 0; + +#ifdef DEBUG + flags |= D3D10_CREATE_DEVICE_DEBUG; +#endif + + hr = D3D10CreateDeviceAndSwapChain(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, flags, D3D10_SDK_VERSION, &scd, &m_swapchain, &m_dev); + + if(FAILED(hr)) return false; + + D3D10_BUFFER_DESC bd; + D3D10_SAMPLER_DESC sd; + D3D10_DEPTH_STENCIL_DESC dsd; + D3D10_RASTERIZER_DESC rd; + D3D10_BLEND_DESC bsd; + + // convert + + D3D10_INPUT_ELEMENT_DESC il_convert[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + }; + + hr = CompileShader(&m_convert.vs, IDR_CONVERT_FX, "vs_main", il_convert, countof(il_convert), &m_convert.il); + + for(int i = 0; i < countof(m_convert.ps); i++) + { + CStringA main; + main.Format("ps_main%d", i); + hr = CompileShader(&m_convert.ps[i], IDR_CONVERT_FX, main); + } + + memset(&bd, 0, sizeof(bd)); + + bd.Usage = D3D10_USAGE_DEFAULT; + bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + bd.ByteWidth = 4 * sizeof(VertexPT1); + + hr = m_dev->CreateBuffer(&bd, NULL, &m_convert.vb); + + memset(&dsd, 0, sizeof(dsd)); + + dsd.DepthEnable = false; + dsd.StencilEnable = false; + + hr = m_dev->CreateDepthStencilState(&dsd, &m_convert.dss); + + memset(&bsd, 0, sizeof(bsd)); + + bsd.RenderTargetWriteMask[0] = D3D10_COLOR_WRITE_ENABLE_ALL; + bsd.BlendEnable[0] = false; + + hr = m_dev->CreateBlendState(&bsd, &m_convert.bs); + + // merge + + D3D10_INPUT_ELEMENT_DESC il_merge[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, 24, D3D10_INPUT_PER_VERTEX_DATA, 0}, + }; + + hr = CompileShader(&m_merge.vs, IDR_MERGE_FX, "vs_main", il_merge, countof(il_merge), &m_merge.il); + hr = CompileShader(&m_merge.ps, IDR_MERGE_FX, "ps_main"); + + memset(&bd, 0, sizeof(bd)); + + bd.ByteWidth = sizeof(MergeCB); + bd.Usage = D3D10_USAGE_DYNAMIC; // TODO: default + bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER; + bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; + bd.MiscFlags = 0; + + hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.cb); + + memset(&bd, 0, sizeof(bd)); + + bd.Usage = D3D10_USAGE_DEFAULT; + bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + bd.ByteWidth = 4 * sizeof(VertexPT2); + + hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.vb); + + // interlace + + memset(&bd, 0, sizeof(bd)); + + bd.ByteWidth = sizeof(InterlaceCB); + bd.Usage = D3D10_USAGE_DEFAULT; + bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + + hr = m_dev->CreateBuffer(&bd, NULL, &m_interlace.cb); + + for(int i = 0; i < countof(m_interlace.ps); i++) + { + CStringA main; + main.Format("ps_main%d", i); + hr = CompileShader(&m_interlace.ps[i], IDR_INTERLACE_FX, main); + } + + // + + memset(&rd, 0, sizeof(rd)); + + rd.FillMode = D3D10_FILL_SOLID; + rd.CullMode = D3D10_CULL_NONE; + rd.FrontCounterClockwise = false; + rd.DepthBias = false; + rd.DepthBiasClamp = 0; + rd.SlopeScaledDepthBias = 0; + rd.DepthClipEnable = false; // ??? + rd.ScissorEnable = true; + rd.MultisampleEnable = false; + rd.AntialiasedLineEnable = false; + + hr = m_dev->CreateRasterizerState(&rd, &m_rs); + + m_dev->RSSetState(m_rs); + + // + + memset(&sd, 0, sizeof(sd)); + + sd.AddressU = D3D10_TEXTURE_ADDRESS_CLAMP; + sd.AddressV = D3D10_TEXTURE_ADDRESS_CLAMP; + sd.AddressW = D3D10_TEXTURE_ADDRESS_CLAMP; + sd.MaxLOD = FLT_MAX; + sd.MaxAnisotropy = 16; + sd.ComparisonFunc = D3D10_COMPARISON_NEVER; + + sd.Filter = D3D10_FILTER_MIN_MAG_MIP_LINEAR; + + hr = m_dev->CreateSamplerState(&sd, &m_ss_linear); + + sd.Filter = D3D10_FILTER_MIN_MAG_MIP_POINT; + + hr = m_dev->CreateSamplerState(&sd, &m_ss_point); + + // + + ResetDevice(1, 1); + + // + + return true; +} + +void GSDevice::ResetDevice(int w, int h) +{ + m_backbuffer = NULL; + + m_tex_1x1 = GSTexture2D(); + m_tex_merge = GSTexture2D(); + m_tex_interlace = GSTexture2D(); + m_tex_deinterlace = GSTexture2D(); + m_tex_current = GSTexture2D(); + + m_vb = NULL; + m_layout = NULL; + + // + + DXGI_SWAP_CHAIN_DESC scd; + memset(&scd, 0, sizeof(scd)); + m_swapchain->GetDesc(&scd); + m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0); + m_swapchain->GetBuffer(0, __uuidof(ID3D10Texture2D), (void**)&m_backbuffer); + + // + + CreateTexture(m_tex_1x1, 1, 1); +} + +void GSDevice::Present() +{ + m_swapchain->Present(0, 0); +} + +void GSDevice::EndScene() +{ + PSSetShaderResources(NULL, NULL); + + OMSetRenderTargets(NULL, NULL); +} + +void GSDevice::IASet(ID3D10Buffer* vb, UINT count, const void* vertices, UINT stride, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology) +{ + D3D10_BOX box = {0, 0, 0, count * stride, 1, 1}; + + m_dev->UpdateSubresource(vb, 0, &box, vertices, 0, 0); + + if(m_vb != vb || m_vb_stride != stride) + { + UINT offset = 0; + + m_dev->IASetVertexBuffers(0, 1, &vb, &stride, &offset); + + m_vb = vb; + m_vb_stride = stride; + } + + if(m_layout != layout) + { + m_dev->IASetInputLayout(layout); + + m_layout = layout; + } + + if(m_topology != topology) + { + m_dev->IASetPrimitiveTopology(topology); + + m_topology = topology; + } +} + +void GSDevice::VSSet(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb) +{ + if(m_vs != vs) + { + m_dev->VSSetShader(vs); + + m_vs = vs; + } + + if(m_vs_cb != vs_cb) + { + m_dev->VSSetConstantBuffers(0, 1, &vs_cb); + + m_vs_cb = vs_cb; + } +} + +void GSDevice::GSSet(ID3D10GeometryShader* gs) +{ + if(m_gs != gs) + { + m_dev->GSSetShader(gs); + + m_gs = gs; + } +} + +void GSDevice::PSSetShaderResources(ID3D10ShaderResourceView* srv0, ID3D10ShaderResourceView* srv1) +{ + if(m_ps_srvs[0] != srv0 || m_ps_srvs[1] != srv1) + { + ID3D10ShaderResourceView* srvs[] = {srv0, srv1}; + + m_dev->PSSetShaderResources(0, 2, srvs); + + m_ps_srvs[0] = srv0; + m_ps_srvs[1] = srv1; + } +} + +void GSDevice::PSSet(ID3D10PixelShader* ps, ID3D10SamplerState* ss) +{ + if(m_ps != ps) + { + m_dev->PSSetShader(ps); + + m_ps = ps; + } + + // ss = m_ss_point; + + if(m_ps_ss != ss) + { + m_dev->PSSetSamplers(0, 1, &ss); + + m_ps_ss = ss; + } +} + +void GSDevice::RSSet(int width, int height, const RECT* scissor) +{ + if(m_viewport.cx != width || m_viewport.cy != height) + { + D3D10_VIEWPORT vp; + + memset(&vp, 0, sizeof(vp)); + + vp.TopLeftX = 0; + vp.TopLeftY = 0; + vp.Width = width; + vp.Height = height; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + + m_dev->RSSetViewports(1, &vp); + + m_viewport = CSize(width, height); + } + + CRect r = scissor ? *scissor : CRect(0, 0, width, height); + + if(m_scissor != r) + { + m_dev->RSSetScissorRects(1, &r); + + m_scissor = r; + } +} + +void GSDevice::OMSet(ID3D10DepthStencilState* dss, UINT sref, ID3D10BlendState* bs, float bf) +{ + if(m_dss != dss || m_sref != sref) + { + m_dev->OMSetDepthStencilState(dss, sref); + + m_dss = dss; + m_sref = sref; + } + + if(m_bs != bs || m_bf != bf) + { + float BlendFactor[] = {bf, bf, bf, 0}; + + m_dev->OMSetBlendState(bs, BlendFactor, 0xffffffff); + + m_bs = bs; + m_bf = bf; + } +} + +void GSDevice::OMSetRenderTargets(ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv) +{ + if(m_rtv != rtv || m_dsv != dsv) + { + m_dev->OMSetRenderTargets(1, &rtv, dsv); + + m_rtv = rtv; + m_dsv = dsv; + } +} + +HRESULT GSDevice::CreateRenderTarget(GSTexture2D& t, int w, int h, DXGI_FORMAT format) +{ + return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE); +} + +HRESULT GSDevice::CreateDepthStencil(GSTexture2D& t, int w, int h, DXGI_FORMAT format) +{ + return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_DEPTH_STENCIL); +} + +HRESULT GSDevice::CreateTexture(GSTexture2D& t, int w, int h, DXGI_FORMAT format) +{ + return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_SHADER_RESOURCE); +} + +HRESULT GSDevice::CreateOffscreenPlainSurface(GSTexture2D& t, int w, int h, DXGI_FORMAT format) +{ + return Create(t, w, h, format, D3D10_USAGE_STAGING, 0); +} + +HRESULT GSDevice::Create(GSTexture2D& t, int w, int h, DXGI_FORMAT format, D3D10_USAGE usage, UINT bindFlags) +{ + HRESULT hr; + + Recycle(t); + + for(POSITION pos = m_pool.GetHeadPosition(); pos; m_pool.GetNext(pos)) + { + const GSTexture2D& t2 = m_pool.GetAt(pos); + + if(t2.m_desc.Usage == usage && t2.m_desc.BindFlags == bindFlags && t2.m_desc.Width == w && t2.m_desc.Height == h && t2.m_desc.Format == format) + { + t = t2; + + m_pool.RemoveAt(pos); + + return S_OK; + } + } + + D3D10_TEXTURE2D_DESC desc; + + memset(&desc, 0, sizeof(desc)); + + desc.Width = w; + desc.Height = h; + desc.Format = format; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = usage; + desc.BindFlags = bindFlags; + desc.CPUAccessFlags = + usage == D3D10_USAGE_STAGING ? (D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE) : + usage == D3D10_USAGE_DYNAMIC ? (D3D10_CPU_ACCESS_WRITE) : + 0; + + CComPtr texture; + + hr = m_dev->CreateTexture2D(&desc, NULL, &texture); + + if(SUCCEEDED(hr)) + { + t.m_dev = m_dev; + t.m_texture = texture.Detach(); + t.m_desc = desc; + } + +//_tprintf(_T("Create %d x %d (%d %d %d) => %08x (%d)\n"), w, h, usage, bindFlags, format, hr, m_pool.GetCount()); + + return hr; +} + +void GSDevice::Recycle(GSTexture2D& t) +{ + if(t.m_texture) + { + m_pool.AddHead(t); + + while(m_pool.GetCount() > 200) + { +//_tprintf(_T("Destroy %d x %d (%d)\n"), m_pool.GetTail().m_desc.Width, m_pool.GetTail().m_desc.Height, m_pool.GetCount()); + m_pool.RemoveTail(); + } + + t = GSTexture2D(); + } +} + +bool GSDevice::SaveCurrent(LPCTSTR fn) +{ + return SUCCEEDED(D3DX10SaveTextureToFile(m_tex_current, D3DX10_IFF_BMP, fn)); +} + +bool GSDevice::SaveToFileD32S8X24(ID3D10Texture2D* ds, LPCTSTR fn) +{ + HRESULT hr; + + D3D10_TEXTURE2D_DESC desc; + + memset(&desc, 0, sizeof(desc)); + + ds->GetDesc(&desc); + + desc.Usage = D3D10_USAGE_STAGING; + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ; + + CComPtr src, dst; + + hr = m_dev->CreateTexture2D(&desc, NULL, &src); + + m_dev->CopyResource(src, ds); + + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; + + hr = m_dev->CreateTexture2D(&desc, NULL, &dst); + + D3D10_MAPPED_TEXTURE2D sm, dm; + + hr = src->Map(0, D3D10_MAP_READ, 0, &sm); + + hr = dst->Map(0, D3D10_MAP_WRITE, 0, &dm); + + BYTE* s = (BYTE*)sm.pData; + BYTE* d = (BYTE*)dm.pData; + + for(int y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch) + { + float* sf = (float*)s; + DWORD* dd = (DWORD*)d; + + for(int x = 0; x < desc.Width; x++) + { + BYTE b = (BYTE)(sf[x*2] * 255); + + dd[x] = (b << 24) | (b << 16) | (b << 8) | 0xff; + } + } + + src->Unmap(0); + + dst->Unmap(0); + + return SUCCEEDED(D3DX10SaveTextureToFile(dst, D3DX10_IFF_BMP, fn)); +} + +void GSDevice::StretchRect(GSTexture2D& st, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear) +{ + StretchRect(st, D3DXVECTOR4(0, 0, 1, 1), dt, dr, m_convert.ps[0], linear); +} + +void GSDevice::StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear) +{ + StretchRect(st, sr, dt, dr, m_convert.ps[0], linear); +} + +void GSDevice::StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, ID3D10PixelShader* ps, bool linear) +{ + // om + + OMSet(m_convert.dss, 0, m_convert.bs, 0); + + OMSetRenderTargets(dt, NULL); + + // ia + + float left = dr.x * 2 / dt.m_desc.Width - 1.0f; + float top = 1.0f - dr.y * 2 / dt.m_desc.Height; + float right = dr.z * 2 / dt.m_desc.Width - 1.0f; + float bottom = 1.0f - dr.w * 2 / dt.m_desc.Height; + + VertexPT1 vertices[] = + { + {left, top, 0.5f, 1.0f, sr.x, sr.y}, + {right, top, 0.5f, 1.0f, sr.z, sr.y}, + {left, bottom, 0.5f, 1.0f, sr.x, sr.w}, + {right, bottom, 0.5f, 1.0f, sr.z, sr.w}, + }; + + IASet(m_convert.vb, 4, vertices, m_convert.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + // vs + + VSSet(m_convert.vs, NULL); + + // gs + + GSSet(NULL); + + // ps + + PSSetShaderResources(st, NULL); + + PSSet(ps, linear ? m_ss_linear : m_ss_point); + + // rs + + RSSet(dt.m_desc.Width, dt.m_desc.Height); + + // + + m_dev->Draw(4, 0); + + EndScene(); +} + +void GSDevice::Interlace(GSTexture2D& st, GSTexture2D& dt, int shader, bool linear, float yoffset) +{ + InterlaceCB cb; + + cb.ZrH = D3DXVECTOR2(0, 1.0f / dt.m_desc.Height); + cb.hH = (float)dt.m_desc.Height / 2; + + m_dev->UpdateSubresource(m_interlace.cb, 0, NULL, &cb, 0, 0); + + m_dev->PSSetConstantBuffers(0, 1, &m_interlace.cb.p); + + D3DXVECTOR4 sr(0, 0, 1, 1); + D3DXVECTOR4 dr(0, yoffset, (float)dt.m_desc.Width, (float)dt.m_desc.Height + yoffset); + + StretchRect(st, sr, dt, dr, m_interlace.ps[shader], linear); +} + +ID3D10Texture2D* GSDevice::Interlace(GSTexture2D& st, CSize ds, int field, int mode, float yoffset) +{ + ID3D10Texture2D* t = st; + + if(!m_tex_interlace || m_tex_interlace.m_desc.Width != ds.cx || m_tex_interlace.m_desc.Height != ds.cy) + { + CreateRenderTarget(m_tex_interlace, ds.cx, ds.cy); + } + + if(mode == 0 || mode == 2) // weave or blend + { + // weave first + + Interlace(m_tex_merge, m_tex_interlace, field, false); + + t = m_tex_interlace; + + if(mode == 2) + { + // blend + + if(!m_tex_deinterlace || m_tex_deinterlace.m_desc.Width != ds.cx || m_tex_deinterlace.m_desc.Height != ds.cy) + { + CreateRenderTarget(m_tex_deinterlace, ds.cx, ds.cy); + } + + if(field == 0) return NULL; + + Interlace(m_tex_interlace, m_tex_deinterlace, 2, false); + + t = m_tex_deinterlace; + } + } + else if(mode == 1) // bob + { + Interlace(m_tex_merge, m_tex_interlace, 3, true, yoffset * field); + + t = m_tex_interlace; + } + + return t; +} + +HRESULT GSDevice::CompileShader(ID3D10VertexShader** ps, UINT id, LPCSTR entry, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** pl, D3D10_SHADER_MACRO* macro) +{ + HRESULT hr; + + CComPtr shader, error; + + hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "vs_4_0", 0, 0, NULL, &shader, &error, NULL); + + if(error) + { + TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + } + + if(FAILED(hr)) + { + return hr; + } + + hr = m_dev->CreateVertexShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), ps); + + if(FAILED(hr)) + { + return hr; + } + + hr = m_dev->CreateInputLayout(layout, count, shader->GetBufferPointer(), shader->GetBufferSize(), pl); + + if(FAILED(hr)) + { + return hr; + } + + return hr; +} + +HRESULT GSDevice::CompileShader(ID3D10GeometryShader** gs, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro) +{ + HRESULT hr; + + CComPtr shader, error; + + hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "gs_4_0", 0, 0, NULL, &shader, &error, NULL); + + if(error) + { + TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + } + + if(FAILED(hr)) + { + return hr; + } + + hr = m_dev->CreateGeometryShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), gs); + + if(FAILED(hr)) + { + return hr; + } + + return hr; +} + +HRESULT GSDevice::CompileShader(ID3D10PixelShader** ps, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro) +{ + HRESULT hr; + + CComPtr shader, error; + + hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "ps_4_0", 0, 0, NULL, &shader, &error, NULL); + + if(error) + { + TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer())); + } + + if(FAILED(hr)) + { + return hr; + } + + hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), ps); + + if(FAILED(hr)) + { + return hr; + } + + return hr; +} diff --git a/gsdx10/GSDevice.h b/gsdx10/GSDevice.h new file mode 100644 index 0000000..a2743fe --- /dev/null +++ b/gsdx10/GSDevice.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSTexture2D.h" + +#pragma pack(push, 1) + +struct MergeCB +{ + D3DXVECTOR4 BGColor; + float Alpha; + float EN1; + float EN2; + int MMOD; + int SLBG; + float Padding[3]; +}; + +struct InterlaceCB +{ + D3DXVECTOR2 ZrH; + float hH; + float _pad; +}; + +struct VertexPT1 +{ + float x, y, z, w; + float tu, tv; +}; + +struct VertexPT2 +{ + float x, y, z, w; + float tu1, tv1; + float tu2, tv2; +}; + +#pragma pack(pop) + +class GSDevice +{ + // texture cache + + CAtlList m_pool; + + // state cache + + ID3D10Buffer* m_vb; + UINT m_vb_stride; + + ID3D10InputLayout* m_layout; + D3D10_PRIMITIVE_TOPOLOGY m_topology; + + ID3D10VertexShader* m_vs; + ID3D10Buffer* m_vs_cb; + + ID3D10GeometryShader* m_gs; + + ID3D10ShaderResourceView* m_ps_srvs[2]; + + ID3D10PixelShader* m_ps; + ID3D10SamplerState* m_ps_ss; + + CSize m_viewport; + CRect m_scissor; + + ID3D10DepthStencilState* m_dss; + UINT m_sref; + ID3D10BlendState* m_bs; + float m_bf; + ID3D10RenderTargetView* m_rtv; + ID3D10DepthStencilView* m_dsv; + + // + + void Interlace(GSTexture2D& st, GSTexture2D& dt, int shader, bool linear, float yoffset = 0); + +public: // TODO + CComPtr m_dev; + CComPtr m_swapchain; + CComPtr m_backbuffer; + CComPtr m_tex_current; + + GSTexture2D m_tex_merge; + GSTexture2D m_tex_interlace; + GSTexture2D m_tex_deinterlace; + GSTexture2D m_tex_1x1; + + CComPtr m_ss_linear; + CComPtr m_ss_point; + + CComPtr m_rs; + + struct + { + CComPtr vb; + CComPtr il; + CComPtr vs; + CComPtr ps[4]; + CComPtr dss; + CComPtr bs; + } m_convert; + + struct + { + CComPtr vb; + CComPtr il; + CComPtr vs; + CComPtr ps; + CComPtr cb; + } m_merge; + + struct + { + CComPtr ps[4]; + CComPtr cb; + } m_interlace; + +public: + GSDevice(); + virtual ~GSDevice(); + + bool Create(HWND hWnd); + + ID3D10Device* operator->() {return m_dev;} + operator ID3D10Device*() {return m_dev;} + + void ResetDevice(int w, int h); + void EndScene(); + void Present(); + + void IASet(ID3D10Buffer* vb, UINT count, const void* vertices, UINT stride, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology); + void VSSet(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb); + void GSSet(ID3D10GeometryShader* gs); + void PSSetShaderResources(ID3D10ShaderResourceView* srv0, ID3D10ShaderResourceView* srv1); + void PSSet(ID3D10PixelShader* ps, ID3D10SamplerState* ss); + void RSSet(int width, int height, const RECT* scissor = NULL); + void OMSet(ID3D10DepthStencilState* dss, UINT sref, ID3D10BlendState* bs, float bf); + void OMSetRenderTargets(ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv); + + template void IASet(ID3D10Buffer* vb, UINT count, T* vertices, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology) + { + IASet(vb, count, vertices, sizeof(T), layout, topology); + } + + HRESULT CreateRenderTarget(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM); + HRESULT CreateDepthStencil(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + HRESULT CreateTexture(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM); + HRESULT CreateOffscreenPlainSurface(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM); + HRESULT Create(GSTexture2D& t, int w, int h, DXGI_FORMAT format, D3D10_USAGE usage, UINT bind); + + void Recycle(GSTexture2D& t); + + bool SaveCurrent(LPCTSTR fn); + bool SaveToFileD32S8X24(ID3D10Texture2D* ds, LPCTSTR fn); + + void StretchRect(GSTexture2D& st, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear = true); + void StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear = true); + void StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, ID3D10PixelShader* ps, bool linear = true); + + ID3D10Texture2D* Interlace(GSTexture2D& st, CSize ds, int field, int mode, float yoffset); + + HRESULT CompileShader(ID3D10VertexShader** ps, UINT id, LPCSTR entry, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** pl, D3D10_SHADER_MACRO* macro = NULL); + HRESULT CompileShader(ID3D10GeometryShader** gs, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro = NULL); + HRESULT CompileShader(ID3D10PixelShader** ps, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro = NULL); +}; diff --git a/gsdx10/GSRenderTarget.cpp b/gsdx10/GSRenderTarget.cpp new file mode 100644 index 0000000..6ba4b2b --- /dev/null +++ b/gsdx10/GSRenderTarget.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTextureCache.h" +#include "GSRendererHW.h" + +GSTextureCache::GSRenderTarget::GSRenderTarget(GSTextureCache* tc) + : GSSurface(tc) + , m_used(true) +{ +} + +bool GSTextureCache::GSRenderTarget::Create(int w, int h) +{ + HRESULT hr; + + hr = m_tc->m_renderer->m_dev.CreateRenderTarget(m_texture, w, h); + + if(FAILED(hr)) return false; + + float color[4] = {0, 0, 0, 0}; + + m_tc->m_renderer->m_dev->ClearRenderTargetView(m_texture, color); + + return true; +} + +void GSTextureCache::GSRenderTarget::Update() +{ + __super::Update(); + + // FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :) + + CRect r = m_dirty.GetDirtyRect(m_TEX0); + + m_dirty.RemoveAll(); + + if(r.IsRectEmpty()) return; + + // s->m_perfmon.Put(GSPerfMon::WriteRT, 1); + + HRESULT hr; + + if(r.right > 1024) {ASSERT(0); r.right = 1024;} + if(r.bottom > 1024) {ASSERT(0); r.bottom = 1024;} + + int w = r.Width(); + int h = r.Height(); + + static BYTE* buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); + static int pitch = 1024 * 4; + + GIFRegTEXA TEXA; + + TEXA.AEM = 1; + TEXA.TA0 = 0; + TEXA.TA1 = 0x80; + + GIFRegCLAMP CLAMP; + + CLAMP.WMS = 0; + CLAMP.WMT = 0; + + m_tc->m_renderer->m_mem.ReadTexture(r, buff, pitch, m_TEX0, TEXA, CLAMP); + + // s->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); + + GSTexture2D texture; + + hr = m_tc->m_renderer->m_dev.CreateTexture(texture, w, h); + + if(FAILED(hr)) return; + + D3D10_BOX box = {0, 0, 0, w, h, 1}; + + m_tc->m_renderer->m_dev->UpdateSubresource(texture, 0, &box, buff, pitch, 0); + + D3DXVECTOR4 dst(m_scale.x * r.left, m_scale.y * r.top, m_scale.x * r.right, m_scale.y * r.bottom); + + m_tc->m_renderer->m_dev.StretchRect(texture, m_texture, dst); + + m_tc->m_renderer->m_dev.Recycle(texture); +} + +void GSTextureCache::GSRenderTarget::Read(CRect r) +{ + HRESULT hr; + + if(m_TEX0.PSM != PSM_PSMCT32 + && m_TEX0.PSM != PSM_PSMCT24 + && m_TEX0.PSM != PSM_PSMCT16 + && m_TEX0.PSM != PSM_PSMCT16S) + { + //ASSERT(0); + return; + } + + TRACE(_T("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n"), r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); + + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + + // + + float left = m_scale.x * r.left / m_texture.m_desc.Width; + float top = m_scale.y * r.top / m_texture.m_desc.Height; + float right = m_scale.x * r.right / m_texture.m_desc.Width; + float bottom = m_scale.y * r.bottom / m_texture.m_desc.Height; + + D3DXVECTOR4 src(left, top, right, bottom); + D3DXVECTOR4 dst(0, 0, r.Width(), r.Height()); + + DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + + int shader = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? 1 : 0; + + GSTexture2D rt; + + hr = m_tc->m_renderer->m_dev.CreateRenderTarget(rt, r.Width(), r.Height(), format); + + m_tc->m_renderer->m_dev.StretchRect(m_texture, src, rt, dst, m_tc->m_renderer->m_dev.m_convert.ps[shader]); + + GSTexture2D offscreen; + + hr = m_tc->m_renderer->m_dev.CreateOffscreenPlainSurface(offscreen, r.Width(), r.Height(), format); + + m_tc->m_renderer->m_dev->CopyResource(offscreen, rt); + + m_tc->m_renderer->m_dev.Recycle(rt); + + D3D10_MAPPED_TEXTURE2D map; + + if(SUCCEEDED(hr) && SUCCEEDED(offscreen->Map(0, D3D10_MAP_READ, 0, &map))) + { + // TODO: block level write + + DWORD bp = m_TEX0.TBP0; + DWORD bw = m_TEX0.TBW; + + GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[m_TEX0.PSM].pa; + + BYTE* bits = (BYTE*)map.pData; + + if(m_TEX0.PSM == PSM_PSMCT32) + { + for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch) + { + DWORD addr = pa(0, y, bp, bw); + int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7]; + + for(int x = r.left, i = 0; x < r.right; x++, i++) + { + m_tc->m_renderer->m_mem.writePixel32(addr + offset[x], ((DWORD*)bits)[i]); + } + } + } + else if(m_TEX0.PSM == PSM_PSMCT24) + { + for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch) + { + DWORD addr = pa(0, y, bp, bw); + int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7]; + + for(int x = r.left, i = 0; x < r.right; x++, i++) + { + m_tc->m_renderer->m_mem.writePixel24(addr + offset[x], ((DWORD*)bits)[i]); + } + } + } + else if(m_TEX0.PSM == PSM_PSMCT16) + { + for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch) + { + DWORD addr = pa(0, y, bp, bw); + int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7]; + + for(int x = r.left, i = 0; x < r.right; x++, i++) + { + m_tc->m_renderer->m_mem.writePixel16(addr + offset[x], ((WORD*)bits)[i]); + } + } + } + else if(m_TEX0.PSM == PSM_PSMCT16S) + { + for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch) + { + DWORD addr = pa(0, y, bp, bw); + int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7]; + + for(int x = r.left, i = 0; x < r.right; x++, i++) + { + m_tc->m_renderer->m_mem.writePixel16S(addr + offset[x], ((WORD*)bits)[i]); + } + } + } + else + { + ASSERT(0); + } + + offscreen->Unmap(0); + } + + m_tc->m_renderer->m_dev.Recycle(offscreen); +} diff --git a/gsdx10/GSRenderer.cpp b/gsdx10/GSRenderer.cpp new file mode 100644 index 0000000..62145e0 --- /dev/null +++ b/gsdx10/GSRenderer.cpp @@ -0,0 +1,466 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSRenderer.h" +#include "GSSettingsDlg.h" + +BEGIN_MESSAGE_MAP(GSRenderer, CWnd) + ON_WM_CLOSE() +END_MESSAGE_MAP() + +GSRenderer::GSRenderer(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSState(base, mt, irq, nloophack) + , m_osd(true) + , m_field(0) + , m_crc(0) + , m_options(0) + , m_frameskip(0) +{ + m_interlace = AfxGetApp()->GetProfileInt(_T("Settings"), _T("interlace"), 0); + m_aspectratio = AfxGetApp()->GetProfileInt(_T("Settings"), _T("aspectratio"), 1); + m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1); + m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); +} + +GSRenderer::~GSRenderer() +{ + DestroyWindow(); +} + +bool GSRenderer::Create(LPCTSTR title) +{ + CRect r; + + GetDesktopWindow()->GetWindowRect(r); + + CSize s(r.Width() / 3, r.Width() / 4); + + r = CRect(r.CenterPoint() - CSize(s.cx / 2, s.cy / 2), s); + + LPCTSTR wc = AfxRegisterWndClass(CS_VREDRAW|CS_HREDRAW|CS_DBLCLKS, AfxGetApp()->LoadStandardCursor(IDC_ARROW), 0, 0); + + if(!CreateEx(0, wc, title, WS_OVERLAPPEDWINDOW, r, NULL, 0)) + { + return false; + } + + if(!m_dev.Create(m_hWnd)) + { + return false; + } + + Reset(); + + return true; +} + +void GSRenderer::Show() +{ + SetWindowPos(&wndTop, 0, 0, 0, 0, SWP_NOMOVE|SWP_NOSIZE); + SetForegroundWindow(); + ShowWindow(SW_SHOWNORMAL); +} + +void GSRenderer::Hide() +{ + ShowWindow(SW_HIDE); +} + +void GSRenderer::OnClose() +{ + Hide(); + + PostMessage(WM_QUIT); +} + +void GSRenderer::VSync(int field) +{ + m_field = !!field; + + MSG msg; + + memset(&msg, 0, sizeof(msg)); + + while(msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + if(msg.message == WM_KEYDOWN) + { + int step = (::GetAsyncKeyState(VK_SHIFT) & 0x80000000) ? -1 : 1; + + if(msg.wParam == VK_F5) + { + m_interlace = (m_interlace + 7 + step) % 7; + continue; + } + + if(msg.wParam == VK_F6) + { + m_aspectratio = (m_aspectratio + 3 + step) % 3; + continue; + } + + if(msg.wParam == VK_F7) + { + SetWindowText(_T("PCSX2")); + m_osd = !m_osd; + continue; + } + } + + TranslateMessage(&msg); + DispatchMessage(&msg); + } + + Flush(); + + Flip(); + + Present(); +} + +bool GSRenderer::MakeSnapshot(char* path) +{ + CString fn; + fn.Format(_T("%sgsdx10_%s.bmp"), CString(path), CTime::GetCurrentTime().Format(_T("%Y%m%d%H%M%S"))); + return m_dev.SaveCurrent(fn); +} + +void GSRenderer::SetGameCRC(int crc, int options) +{ + m_crc = crc; + m_options = options; + + if(AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2) == 2) + { + switch(crc) + { + case 0xa39517ab: // ffx pal/eu + case 0xa39517ae: // ffx pal/fr + case 0x941bb7d9: // ffx pal/de + case 0xa39517a9: // ffx pal/it + case 0x941bb7de: // ffx pal/es + case 0xbb3d833a: // ffx ntsc/us + case 0x6a4efe60: // ffx ntsc/j + case 0x3866ca7e: // ffx int. ntsc/asia (SLPM-67513, some kind of a asia version) + case 0x658597e2: // ffx int. ntsc/j + case 0x9aac5309: // ffx-2 pal/e + case 0x9aac530c: // ffx-2 pal/fr + case 0x9aac530a: // ffx-2 pal/fr? (maybe belgium or luxembourg version) + case 0x9aac530d: // ffx-2 pal/de + case 0x9aac530b: // ffx-2 pal/it + case 0x48fe0c71: // ffx-2 ntsc/us + case 0xe1fd9a2d: // ffx-2 int+lm ntsc/j + case 0xf0a6d880: // harvest moon ntsc/us + m_nloophack = true; + break; + } + } +} + +void GSRenderer::SetFrameSkip(int frameskip) +{ + if(m_frameskip != frameskip) + { + m_frameskip = frameskip; + + if(frameskip) + { + } + else + { + } + } +} + +// TODO + +void GSRenderer::FinishFlip(FlipInfo src[2]) +{ + CSize fs(0, 0); + CSize ds(0, 0); + + for(int i = 0; i < 2; i++) + { + if(src[i].t) + { + CSize s = GetFrameSize(i); + + s.cx = (int)(src[i].s.x * s.cx); + s.cy = (int)(src[i].s.y * s.cy); + + ASSERT(fs.cx == 0 || fs.cx == s.cx); + ASSERT(fs.cy == 0 || fs.cy == s.cy || fs.cy + 1 == s.cy); + + fs.cx = s.cx; + fs.cy = s.cy; + + if(SMODE2->INT && SMODE2->FFMD) s.cy *= 2; + + ASSERT(ds.cx == 0 || ds.cx == s.cx); + ASSERT(ds.cy == 0 || ds.cy == s.cy || ds.cy + 1 == s.cy); + + ds.cx = s.cx; + ds.cy = s.cy; + } + } + + if(fs.cx == 0 || fs.cy == 0) + { + return; + } + + // merge + + if(!m_dev.m_tex_merge || m_dev.m_tex_merge.m_desc.Width != fs.cx || m_dev.m_tex_merge.m_desc.Height != fs.cy) + { + m_dev.CreateRenderTarget(m_dev.m_tex_merge, fs.cx, fs.cy); + } + + Merge(src, m_dev.m_tex_merge); + + ID3D10Texture2D* current = m_dev.m_tex_merge; + + if(SMODE2->INT && m_interlace > 0) + { + int field = 1 - ((m_interlace - 1) & 1); + int mode = (m_interlace - 1) >> 1; + + current = m_dev.Interlace(m_dev.m_tex_merge, ds, m_field ^ field, mode, src[1].s.y); + + if(!current) return; + } + + m_dev.m_tex_current = current; +} + +void GSRenderer::Merge(FlipInfo src[2], GSTexture2D& dst) +{ + // om + + m_dev.OMSetRenderTargets(dst, NULL); + + m_dev.OMSet(m_dev.m_convert.dss, 0, m_dev.m_convert.bs, 0); + + // ia + + CRect r[2]; + + r[0] = GetFrameRect(0); + r[1] = GetFrameRect(1); + + VertexPT2 vertices[] = + { + {-1, +1, 0.5f, 1.0f, + src[0].s.x * r[0].left / src[0].t.m_desc.Width, src[0].s.y * r[0].top / src[0].t.m_desc.Height, + src[1].s.x * r[1].left / src[1].t.m_desc.Width, src[1].s.y * r[1].top / src[1].t.m_desc.Height}, + {+1, +1, 0.5f, 1.0f, + src[0].s.x * r[0].right / src[0].t.m_desc.Width, src[0].s.y * r[0].top / src[0].t.m_desc.Height, + src[1].s.x * r[1].right / src[1].t.m_desc.Width, src[1].s.y * r[1].top / src[1].t.m_desc.Height}, + {-1, -1, 0.5f, 1.0f, + src[0].s.x * r[0].left / src[0].t.m_desc.Width, src[0].s.y * r[0].bottom / src[0].t.m_desc.Height, + src[1].s.x * r[1].left / src[1].t.m_desc.Width, src[1].s.y * r[1].bottom / src[1].t.m_desc.Height}, + {+1, -1, 0.5f, 1.0f, + src[0].s.x * r[0].right / src[0].t.m_desc.Width, src[0].s.y * r[0].bottom / src[0].t.m_desc.Height, + src[1].s.x * r[1].right / src[1].t.m_desc.Width, src[1].s.y * r[1].bottom / src[1].t.m_desc.Height}, + }; + + m_dev.IASet(m_dev.m_merge.vb, 4, vertices, m_dev.m_merge.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + // vs + + m_dev.VSSet(m_dev.m_merge.vs, NULL); + + // gs + + m_dev.GSSet(NULL); + + // ps + + MergeCB* cb = NULL; + + if(SUCCEEDED(m_dev.m_merge.cb->Map(D3D10_MAP_WRITE_DISCARD, NULL, (void**)&cb))) + { + cb->BGColor.x = (float)BGCOLOR->R / 255; + cb->BGColor.y = (float)BGCOLOR->G / 255; + cb->BGColor.z = (float)BGCOLOR->B / 255; + cb->BGColor.w = 0; + cb->Alpha = (float)PMODE->ALP / 255; + cb->EN1 = (float)IsEnabled(0); + cb->EN2 = (float)IsEnabled(1); + cb->MMOD = !!PMODE->MMOD; + cb->SLBG = !!PMODE->SLBG; + + m_dev.m_merge.cb->Unmap(); + } + + m_dev->PSSetConstantBuffers(0, 1, &m_dev.m_merge.cb.p); + + m_dev.PSSetShaderResources(src[0].t ? src[0].t : m_dev.m_tex_1x1, src[1].t ? src[1].t : m_dev.m_tex_1x1); + + m_dev.PSSet(m_dev.m_merge.ps, m_dev.m_ss_linear); + + // rs + + m_dev.RSSet(dst.m_desc.Width, dst.m_desc.Height); + + // + + m_dev->Draw(4, 0); + + m_dev.EndScene(); +} + +void GSRenderer::Present() +{ + m_perfmon.Put(GSPerfMon::Frame); + + HRESULT hr; + + CRect cr; + + GetClientRect(&cr); + + D3D10_TEXTURE2D_DESC desc; + + memset(&desc, 0, sizeof(desc)); + + m_dev.m_backbuffer->GetDesc(&desc); + + if(desc.Width != cr.Width() || desc.Height != cr.Height()) + { + // TODO: ResetDevice(); + + m_dev.ResetDevice(cr.Width(), cr.Height()); + } + + CComPtr rtv; + + hr = m_dev->CreateRenderTargetView(m_dev.m_backbuffer, NULL, &rtv.p); + + float color[4] = {0, 0, 0, 0}; + + m_dev->ClearRenderTargetView(rtv, color); + + if(m_dev.m_tex_current) + { + static int ar[][2] = {{0, 0}, {4, 3}, {16, 9}}; + + int arx = ar[m_aspectratio][0]; + int ary = ar[m_aspectratio][1]; + + CRect r = cr; + + if(arx > 0 && ary > 0) + { + if(r.Width() * ary > r.Height() * arx) + { + int w = r.Height() * arx / ary; + r.left = r.CenterPoint().x - w / 2; + if(r.left & 1) r.left++; + r.right = r.left + w; + } + else + { + int h = r.Width() * ary / arx; + r.top = r.CenterPoint().y - h / 2; + if(r.top & 1) r.top++; + r.bottom = r.top + h; + } + } + + r &= cr; + + GSTexture2D st(m_dev.m_tex_current); + GSTexture2D dt(m_dev.m_backbuffer); + D3DXVECTOR4 dr(r.left, r.top, r.right, r.bottom); + + m_dev.StretchRect(st, dt, dr); + } + + // osd + + static UINT64 s_frame = 0; + static CString s_stats; + + if(m_perfmon.GetFrame() - s_frame >= 30) + { + m_perfmon.Update(); + + s_frame = m_perfmon.GetFrame(); + + double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); + + s_stats.Format( + _T("%I64d | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d/%d | %d%% CPU | %.2f | %.2f/%.2f | %.2f"), + m_perfmon.GetFrame(), GetDisplaySize().cx, GetDisplaySize().cy, fps, (int)(100.0 * fps / GetFPS()), + SMODE2->INT ? (CString(_T("Interlaced ")) + (SMODE2->FFMD ? _T("(frame)") : _T("(field)"))) : _T("Progressive"), + g_interlace[m_interlace].name, + g_aspectratio[m_aspectratio].name, + (int)m_perfmon.Get(GSPerfMon::Prim), + (int)m_perfmon.Get(GSPerfMon::Draw), + m_perfmon.CPU(), + m_perfmon.Get(GSPerfMon::Swizzle) / 1024, + m_perfmon.Get(GSPerfMon::Unswizzle) / 1024, + m_perfmon.Get(GSPerfMon::Unswizzle2) / 1024, + m_perfmon.Get(GSPerfMon::Texture) / 1024 + ); + + if(m_osd) // && m_d3dpp.Windowed + { + SetWindowText(s_stats); + } + + if(m_perfmon.Get(GSPerfMon::COLCLAMP)) _tprintf(_T("*** NOT SUPPORTED: color wrap ***\n")); + if(m_perfmon.Get(GSPerfMon::PABE)) _tprintf(_T("*** NOT SUPPORTED: per pixel alpha blend ***\n")); + if(m_perfmon.Get(GSPerfMon::DATE)) _tprintf(_T("*** PERFORMANCE WARNING: destination alpha test used ***\n")); + if(m_perfmon.Get(GSPerfMon::ABE)) _tprintf(_T("*** NOT SUPPORTED: alpha blending mode ***\n")); + if(m_perfmon.Get(GSPerfMon::DepthTexture)) _tprintf(_T("*** NOT SUPPORTED: depth texture ***\n")); + } + +/* + if(m_osd && !m_d3dpp.Windowed) + { + hr = m_dev->BeginScene(); + + hr = m_dev->SetRenderTarget(0, pBackBuffer); + hr = m_dev->SetDepthStencilSurface(NULL); + + CRect r; + + GetClientRect(r); + + D3DCOLOR c = D3DCOLOR_ARGB(255, 0, 255, 0); + + CString str = s_stats; + + str += _T("\n\nF5: interlace mode\nF6: aspect ratio\nF7: OSD"); + + if(m_pD3DXFont->DrawText(NULL, str, -1, &r, DT_CALCRECT|DT_LEFT|DT_WORDBREAK, c)) + { + m_pD3DXFont->DrawText(NULL, str, -1, &r, DT_LEFT|DT_WORDBREAK, c); + } + + hr = m_dev->EndScene(); + } +*/ + m_dev.Present(); +} \ No newline at end of file diff --git a/gsdx10/GSRenderer.h b/gsdx10/GSRenderer.h new file mode 100644 index 0000000..79f1ead --- /dev/null +++ b/gsdx10/GSRenderer.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSDevice.h" + +class GSRenderer : public CWnd, public GSState +{ + DECLARE_MESSAGE_MAP() + +protected: + int m_interlace; + int m_aspectratio; + int m_filter; + int m_options; + bool m_vsync; + bool m_osd; + int m_field; + int m_crc; + int m_frameskip; + + GSPerfMon m_perfmon; + +public: + GSRenderer(BYTE* base, bool mt, void (*irq)(), bool nloophack); + virtual ~GSRenderer(); + + virtual bool Create(LPCTSTR title); + + void Show(); + void Hide(); + + void OnClose(); + + void VSync(int field); + bool MakeSnapshot(char* path); + void SetGameCRC(int crc, int options); + void SetFrameSkip(int frameskip); + + // TODO + + GSDevice m_dev; + + struct FlipInfo + { + GSTexture2D t; + GSScale s; + }; + + virtual void Flip() = 0; + + void FinishFlip(FlipInfo src[2]); + void Merge(FlipInfo src[2], GSTexture2D& dst); + void Present(); +}; + +template class GSRendererT : public GSRenderer +{ +protected: + Vertex* m_vertices; + int m_count; + int m_maxcount; + GSVertexList m_vl; + + void Reset() + { + m_count = 0; + m_vl.RemoveAll(); + + __super::Reset(); + } + + void VertexKick(bool skip) + { + while(m_vl.GetCount() >= primVertexCount[PRIM->PRIM]) + { + if(m_count + 6 > m_maxcount) + { + m_maxcount = max(10000, m_maxcount * 3/2); + + Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 16); + + if(m_vertices) + { + memcpy(vertices, m_vertices, sizeof(Vertex) * m_count); + + _aligned_free(m_vertices); + } + + m_vertices = vertices; + } + + DrawingKick(skip); + } + } + + virtual void DrawingKick(bool skip) = 0; + + void ResetPrim() + { + m_vl.RemoveAll(); + } + + void FlushPrim() + { + if(m_count > 0) + { + Draw(); + + m_count = 0; + } + } + + virtual void Draw() = 0; + +public: + GSRendererT(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSRenderer(base, mt, irq, nloophack) + , m_vertices(NULL) + , m_maxcount(0) + { + } + + virtual ~GSRendererT() + { + if(m_vertices) + { + _aligned_free(m_vertices); + } + } +}; \ No newline at end of file diff --git a/gsdx10/GSRendererHW.cpp b/gsdx10/GSRendererHW.cpp new file mode 100644 index 0000000..77ce45e --- /dev/null +++ b/gsdx10/GSRendererHW.cpp @@ -0,0 +1,1100 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSRendererHW.h" +#include "resource.h" + +GSRendererHW::GSRendererHW(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSRendererT(base, mt, irq, nloophack) + , m_tc(this) + , m_width(1024) + , m_height(1024) + , m_skip(0) +{ + if(!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE)) + { + m_width = AfxGetApp()->GetProfileInt(_T("Settings"), _T("resx"), 1024); + m_height = AfxGetApp()->GetProfileInt(_T("Settings"), _T("resy"), 1024); + } +} + +bool GSRendererHW::Create(LPCTSTR title) +{ + if(!__super::Create(title)) + return false; + + if(!m_tfx.Create(&m_dev)) + return false; + + D3D10_DEPTH_STENCIL_DESC dsd; + + memset(&dsd, 0, sizeof(dsd)); + + dsd.DepthEnable = false; + dsd.StencilEnable = true; + dsd.StencilReadMask = 1; + dsd.StencilWriteMask = 1; + dsd.FrontFace.StencilFunc = D3D10_COMPARISON_ALWAYS; + dsd.FrontFace.StencilPassOp = D3D10_STENCIL_OP_REPLACE; + dsd.FrontFace.StencilFailOp = D3D10_STENCIL_OP_KEEP; + dsd.FrontFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP; + dsd.BackFace.StencilFunc = D3D10_COMPARISON_ALWAYS; + dsd.BackFace.StencilPassOp = D3D10_STENCIL_OP_REPLACE; + dsd.BackFace.StencilFailOp = D3D10_STENCIL_OP_KEEP; + dsd.BackFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP; + + m_dev->CreateDepthStencilState(&dsd, &m_date.dss); + + D3D10_BLEND_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + m_dev->CreateBlendState(&bd, &m_date.bs); + + return true; +} + +void GSRendererHW::VertexKick(bool skip) +{ + GSVertexHW& v = m_vl.AddTail(); + + v.x = (float)m_v.XYZ.X; + v.y = (float)m_v.XYZ.Y; + v.z = (float)m_v.XYZ.Z; + + v.c = m_v.RGBAQ.ai32[0]; + + v.f = m_v.FOG.ai32[1]; + + if(PRIM->TME) + { + if(PRIM->FST) + { + v.w = 1.0f; + v.u = (float)(int)m_v.UV.U; + v.v = (float)(int)m_v.UV.V; + } + else + { + v.w = m_v.RGBAQ.Q; + v.u = m_v.ST.S; + v.v = m_v.ST.T; + } + } + else + { + v.w = 1.0f; + v.u = 0; + v.v = 0; + } + + __super::VertexKick(skip); +} + +void GSRendererHW::DrawingKick(bool skip) +{ + GSVertexHW* v = &m_vertices[m_count]; + int nv = 0; + + switch(PRIM->PRIM) + { + case GS_POINTLIST: + m_vl.RemoveAt(0, v[0]); + nv = 1; + break; + case GS_LINELIST: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + nv = 2; + break; + case GS_LINESTRIP: + m_vl.RemoveAt(0, v[0]); + m_vl.GetAt(0, v[1]); + nv = 2; + break; + case GS_TRIANGLELIST: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + m_vl.RemoveAt(0, v[2]); + nv = 3; + break; + case GS_TRIANGLESTRIP: + m_vl.RemoveAt(0, v[0]); + m_vl.GetAt(0, v[1]); + m_vl.GetAt(1, v[2]); + nv = 3; + break; + case GS_TRIANGLEFAN: + m_vl.GetAt(0, v[0]); + m_vl.RemoveAt(1, v[1]); + m_vl.GetAt(1, v[2]); + nv = 3; + break; + case GS_SPRITE: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + nv = 2; + break; + default: + //m_vl.RemoveAll(); + ASSERT(0); + return; + } + + if(skip) + { + return; + } + + float sx0 = m_context->scissor.x0; + float sy0 = m_context->scissor.y0; + float sx1 = m_context->scissor.x1; + float sy1 = m_context->scissor.y1; + + switch(nv) + { + case 1: + if(v[0].x < sx0 + || v[0].x > sx1 + || v[0].y < sy0 + || v[0].y > sy1) + return; + break; + case 2: + if(v[0].x < sx0 && v[1].x < sx0 + || v[0].x > sx1 && v[1].x > sx1 + || v[0].y < sy0 && v[1].y < sy0 + || v[0].y > sy1 && v[1].y > sy1) + return; + break; + case 3: + if(v[0].x < sx0 && v[1].x < sx0 && v[2].x < sx0 + || v[0].x > sx1 && v[1].x > sx1 && v[2].x > sx1 + || v[0].y < sy0 && v[1].y < sy0 && v[2].y < sy0 + || v[0].y > sy1 && v[1].y > sy1 && v[2].y > sy1) + return; + break; + default: + __assume(0); + } + + m_count += nv; + + // costs a few fps, but fixes RR's shadows (or anything which paints overlapping shapes with date) +/* + if(m_context->TEST.DATE) + { + Flush(); + } +*/ +} +/* +int s_n = 0; +bool s_dump = false; +bool s_save = false; +bool s_savez = false; +*/ + +void GSRendererHW::Draw() +{ +/* +TRACE(_T("[%d] FlushPrim f %05x (%d) z %05x (%d %d %d %d) t %05x %05x (%d)\n"), + (int)m_perfmon.GetFrame(), + (int)m_context->FRAME.Block(), + (int)m_context->FRAME.PSM, + (int)m_context->ZBUF.Block(), + (int)m_context->ZBUF.PSM, + m_context->TEST.ZTE, + m_context->TEST.ZTST, + m_context->ZBUF.ZMSK, + PRIM->TME ? (int)m_context->TEX0.TBP0 : 0xfffff, + PRIM->TME && m_context->TEX0.PSM > PSM_PSMCT16S ? (int)m_context->TEX0.CBP : 0xfffff, + PRIM->TME ? (int)m_context->TEX0.PSM : 0xff); +*/ + // + + if(DetectBadFrame()) + { + return; + } + +/* +if(s_n >= 4653) +{ + s_save = true; +} +*/ + // + + GIFRegTEX0 TEX0; + + // rt + + TEX0.TBP0 = m_context->FRAME.Block(); + TEX0.TBW = m_context->FRAME.FBW; + TEX0.PSM = m_context->FRAME.PSM; + + GSTextureCache::GSRenderTarget* rt = m_tc.GetRenderTarget(TEX0, m_width, m_height); + + // ds + + TEX0.TBP0 = m_context->ZBUF.Block(); + TEX0.TBW = m_context->FRAME.FBW; + TEX0.PSM = m_context->ZBUF.PSM; + + GSTextureCache::GSDepthStencil* ds = m_tc.GetDepthStencil(TEX0, m_width, m_height); + + // tex + + GSTextureCache::GSTexture* tex = NULL; + + if(PRIM->TME) + { + tex = m_tc.GetTexture(); + + if(!tex) return; + } +/* +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp2\\_%05d_f%I64d_tex_%05x_%d.dds"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + if(PRIM->TME) if(s_save) D3DX10SaveTextureToFile(tex->m_texture, D3DX10_IFF_DDS, str); + str.Format(_T("c:\\temp2\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str); + str.Format(_T("c:\\temp2\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); + if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); +} +*/ + // + + int prim = PRIM->PRIM; + + if(!OverrideInput(prim, tex)) + { + return; + } + + D3D10_PRIMITIVE_TOPOLOGY topology; + + switch(prim) + { + case GS_POINTLIST: + topology = D3D10_PRIMITIVE_TOPOLOGY_POINTLIST; + // m_perfmon.Put(GSPerfMon::Prim, m_count); + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + topology = D3D10_PRIMITIVE_TOPOLOGY_LINELIST; + // m_perfmon.Put(GSPerfMon::Prim, m_count / 2); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + topology = D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + // m_perfmon.Put(GSPerfMon::Prim, m_count / 3); + break; + default: + __assume(0); + } + + // m_perfmon.Put(GSPerfMon::Draw, 1); + + // date + + SetupDATE(rt, ds); + + // om + + GSTextureFX::OMDepthStencilSelector om_dssel; + + om_dssel.zte = m_context->TEST.ZTE; + om_dssel.ztst = m_context->TEST.ZTST; + om_dssel.zwe = !m_context->ZBUF.ZMSK; + om_dssel.date = m_context->TEST.DATE; + + GSTextureFX::OMBlendSelector om_bsel; + + om_bsel.abe = PRIM->ABE || (PRIM->PRIM == 1 || PRIM->PRIM == 2) && PRIM->AA1; + om_bsel.a = m_context->ALPHA.A; + om_bsel.b = m_context->ALPHA.B; + om_bsel.c = m_context->ALPHA.C; + om_bsel.d = m_context->ALPHA.D; + om_bsel.wr = (m_context->FRAME.FBMSK & 0x000000ff) != 0x000000ff; + om_bsel.wg = (m_context->FRAME.FBMSK & 0x0000ff00) != 0x0000ff00; + om_bsel.wb = (m_context->FRAME.FBMSK & 0x00ff0000) != 0x00ff0000; + om_bsel.wa = (m_context->FRAME.FBMSK & 0xff000000) != 0xff000000; + + float factor = (float)(int)m_context->ALPHA.FIX / 0x80; + + m_tfx.SetupOM(om_dssel, om_bsel, factor, rt->m_texture, ds->m_texture); + + // ia + + m_tfx.SetupIA(m_vertices, m_count, topology); + + // vs + + GSTextureFX::VSConstantBuffer vs_cb; + + float sx = 2.0f * rt->m_scale.x / (rt->m_texture.m_desc.Width * 16); + float sy = 2.0f * rt->m_scale.y / (rt->m_texture.m_desc.Height * 16); + float ox = (float)(int)m_context->XYOFFSET.OFX; + float oy = (float)(int)m_context->XYOFFSET.OFY; + + vs_cb.VertexScale = D3DXVECTOR4(sx, -sy, 1.0f / UINT_MAX, 0); + vs_cb.VertexOffset = D3DXVECTOR4(ox * sx + 1, -(oy * sy + 1), 0, -1); + vs_cb.TextureScale = D3DXVECTOR2(1.0f, 1.0f); + + if(PRIM->TME && PRIM->FST) + { + vs_cb.TextureScale.x = 1.0f / (16 << m_context->TEX0.TW); + vs_cb.TextureScale.y = 1.0f / (16 << m_context->TEX0.TH); + } + + m_tfx.SetupVS(&vs_cb); + + // gs + + GSTextureFX::GSSelector gs_sel; + + gs_sel.iip = PRIM->IIP; + + switch(prim) + { + case GS_POINTLIST: + gs_sel.prim = 0; + break; + case GS_LINELIST: + case GS_LINESTRIP: + gs_sel.prim = 1; + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + gs_sel.prim = 2; + break; + case GS_SPRITE: + gs_sel.prim = 3; + break; + default: + __assume(0); + } + + m_tfx.SetupGS(gs_sel); + + // ps + + GSTextureFX::PSSelector ps_sel; + + ps_sel.fst = PRIM->FST; + ps_sel.clamp = 0; + ps_sel.bpp = 0; + ps_sel.aem = m_env.TEXA.AEM; + ps_sel.tfx = m_context->TEX0.TFX; + ps_sel.tcc = m_context->TEX0.TCC; + ps_sel.ate = m_context->TEST.ATE; + ps_sel.atst = m_context->TEST.ATST; + ps_sel.fog = PRIM->FGE; + ps_sel.clr1 = om_bsel.abe && om_bsel.a == 1 && om_bsel.b == 2 && om_bsel.d == 1; + ps_sel.fba = m_context->FBA.FBA; + ps_sel.aout = m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S ? 1 : 0; + + GSTextureFX::PSSamplerSelector ps_ssel; + + ps_ssel.min = m_filter == 2 ? (m_context->TEX1.MMIN & 1) : m_filter; + ps_ssel.mag = m_filter == 2 ? (m_context->TEX1.MMAG & 1) : m_filter; + ps_ssel.tau = 0; + ps_ssel.tav = 0; + + GSTextureFX::PSConstantBuffer ps_cb; + + ps_cb.FogColor = D3DXVECTOR4((float)(int)m_env.FOGCOL.FCR / 255, (float)(int)m_env.FOGCOL.FCG / 255, (float)(int)m_env.FOGCOL.FCB / 255, 0); + ps_cb.ClampMin = D3DXVECTOR2(-4096, -4096); + ps_cb.ClampMax = D3DXVECTOR2(+4096, +4096); + ps_cb.TA0 = (float)(int)m_env.TEXA.TA0 / 255; + ps_cb.TA1 = (float)(int)m_env.TEXA.TA1 / 255; + ps_cb.AREF = (float)(int)m_context->TEST.AREF / 255; + + if(m_context->TEST.ATST == 2 || m_context->TEST.ATST == 5) + { + ps_cb.AREF -= 0.9f/256; + } + else if(m_context->TEST.ATST == 3 || m_context->TEST.ATST == 6) + { + ps_cb.AREF += 0.9f/256; + } + + ID3D10ShaderResourceView* tex_view = NULL; + ID3D10ShaderResourceView* pal_view = NULL; + + if(tex) + { + ps_sel.bpp = tex->m_bpp2; + + switch(m_context->CLAMP.WMS) + { + case 0: case 3: ps_ssel.tau = 1; break; + case 1: case 2: ps_ssel.tau = 0; break; + default: __assume(0); + } + + switch(m_context->CLAMP.WMT) + { + case 0: case 3: ps_ssel.tav = 1; break; + case 1: case 2: ps_ssel.tav = 0; break; + default: __assume(0); + } + + if(m_context->CLAMP.WMS == 2) + { + ps_cb.ClampMin.x = (float)(int)m_context->CLAMP.MINU / (1 << m_context->TEX0.TW); + ps_cb.ClampMax.x = (float)(int)m_context->CLAMP.MAXU / (1 << m_context->TEX0.TW); + ps_sel.clamp = 1; + } + + if(m_context->CLAMP.WMT == 2) + { + ps_cb.ClampMin.y = (float)(int)m_context->CLAMP.MINV / (1 << m_context->TEX0.TH); + ps_cb.ClampMax.y = (float)(int)m_context->CLAMP.MAXV / (1 << m_context->TEX0.TH); + ps_sel.clamp = 1; + } + + float w = (float)(int)tex->m_texture.m_desc.Width; + float h = (float)(int)tex->m_texture.m_desc.Height; + + ps_cb.WH = D3DXVECTOR2(w, h); + ps_cb.rWrH = D3DXVECTOR2(1.0f / w, 1.0f / h); + ps_cb.rWZ = D3DXVECTOR2(1.0f / w, 0); + ps_cb.ZrH = D3DXVECTOR2(0, 1.0f / h); + + tex_view = tex->m_texture; + pal_view = tex->m_palette; + } + else + { + ps_sel.tfx = 4; + } + + m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex_view, pal_view); + + // rs + + UINT w = rt->m_texture.m_desc.Width; + UINT h = rt->m_texture.m_desc.Height; + + CRect scissor( + (int)(rt->m_scale.x * (m_context->SCISSOR.SCAX0)), + (int)(rt->m_scale.y * (m_context->SCISSOR.SCAY0)), + (int)(rt->m_scale.x * (m_context->SCISSOR.SCAX1 + 1)), + (int)(rt->m_scale.y * (m_context->SCISSOR.SCAY1 + 1))); + + scissor &= CRect(0, 0, w, h); + + m_tfx.SetupRS(w, h, scissor); + + // draw + + if(!m_context->TEST.ATE || m_context->TEST.ATST != 0) + { + m_dev->Draw(m_count, 0); + } + + if(m_context->TEST.ATE && m_context->TEST.ATST != 1 && m_context->TEST.AFAIL) + { + ASSERT(!m_env.PABE.PABE); + + static const DWORD iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; + + ps_sel.atst = iatst[ps_sel.atst]; + + m_tfx.UpdatePS(ps_sel, ps_ssel); + + bool z = om_dssel.zwe; + bool r = om_bsel.wr; + bool g = om_bsel.wg; + bool b = om_bsel.wb; + bool a = om_bsel.wa; + + switch(m_context->TEST.AFAIL) + { + case 0: z = r = g = b = a = false; break; // none + case 1: z = false; break; // rgba + case 2: r = g = b = a = false; break; // z + case 3: z = a = false; break; // rgb + default: __assume(0); + } + + if(z || r || g || b || a) + { + om_dssel.zwe = z; + om_bsel.wr = r; + om_bsel.wg = g; + om_bsel.wb = b; + om_bsel.wa = a; + + m_tfx.UpdateOM(om_dssel, om_bsel, factor); + + m_dev->Draw(m_count, 0); + } + } + + m_dev.EndScene(); + +/* + if(m_env.COLCLAMP.CLAMP == 0) m_perfmon.Put(GSPerfMon::COLCLAMP); + if(m_env.PABE.PABE) m_perfmon.Put(GSPerfMon::PABE); + if(m_context->TEST.DATE) m_perfmon.Put(GSPerfMon::DATE); + if(om_bsel.abe && om_bsel.a == om_bsel.d && om_bsel.a != om_bsel.b && om_bsel.a != 1 && om_bsel.b != 2) m_perfmon.Put(GSPerfMon::ABE); +*/ +/* +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp2\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str); + str.Format(_T("c:\\temp2\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); + if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); +} +*/ +} + +void GSRendererHW::Flip() +{ + FlipInfo src[2]; + + for(int i = 0; i < countof(src); i++) + { + if(!IsEnabled(i)) + { + continue; + } + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = DISPFB[i]->Block(); + TEX0.TBW = DISPFB[i]->FBW; + TEX0.PSM = DISPFB[i]->PSM; + + if(GSTextureCache::GSRenderTarget* rt = m_tc.GetRenderTarget(TEX0, m_width, m_height, true)) + { + src[i].t = rt->m_texture; + src[i].s = rt->m_scale; +/* +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp2\\_%05d_f%I64d_fr%d_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM); + if(s_save) ::D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str); +} +*/ +//s_dump = m_perfmon.GetFrame() >= 5000; +//if(m_perfmon.GetFrame() == 5000) m_tc.RemoveAll(); + } + } + + FinishFlip(src); + + m_tc.IncAge(); + + m_skip = 0; +} + +void GSRendererHW::InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) +{ + //TRACE(_T("[%d] InvalidateTexture %d,%d - %d,%d %05x\n"), (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP); + + m_tc.InvalidateTexture(BITBLTBUF, &r); +} + +void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) +{ + //TRACE(_T("[%d] InvalidateLocalMem %d,%d - %d,%d %05x\n"), (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP); + + m_tc.InvalidateLocalMem(BITBLTBUF, &r); +} + +void GSRendererHW::MinMaxUV(int w, int h, CRect& r) +{ + r.SetRect(0, 0, w, h); + + if(m_count > 100) + { + return; + } + + if(m_context->CLAMP.WMS < 3 || m_context->CLAMP.WMT < 3) + { + uvmm_t uv; + + uv.umin = uv.vmin = 0; + uv.umax = uv.vmax = 1; + + if(PRIM->FST) + { + UVMinMax(m_count, (vertex_t*)m_vertices, &uv); + + uv.umin *= 1.0f / (16 << m_context->TEX0.TW); + uv.umax *= 1.0f / (16 << m_context->TEX0.TW); + uv.vmin *= 1.0f / (16 << m_context->TEX0.TH); + uv.vmax *= 1.0f / (16 << m_context->TEX0.TH); + } + else + { + // FIXME + + if(m_count > 0)// && m_count < 100) + { + uv.umin = uv.vmin = +1e10; + uv.umax = uv.vmax = -1e10; + + for(int i = 0, j = m_count; i < j; i++) + { + float w = 1.0f / m_vertices[i].w; + float u = m_vertices[i].u * w; + if(uv.umax < u) uv.umax = u; + if(uv.umin > u) uv.umin = u; + float v = m_vertices[i].v * w; + if(uv.vmax < v) uv.vmax = v; + if(uv.vmin > v) uv.vmin = v; + } + } + } + + CSize bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs; + + CSize bsm(bs.cx-1, bs.cy-1); + + if(m_context->CLAMP.WMS < 3) + { + if(m_context->CLAMP.WMS == 0) + { + float fmin = floor(uv.umin); + float fmax = floor(uv.umax); + + if(fmin != fmax) {uv.umin = 0; uv.umax = 1.0f;} + else {uv.umin -= fmin; uv.umax -= fmax;} + + // FIXME: + if(uv.umin == 0 && uv.umax != 1.0f) uv.umax = 1.0f; + } + else if(m_context->CLAMP.WMS == 1) + { + if(uv.umin < 0) uv.umin = 0; + else if(uv.umin > 1.0f) uv.umin = 1.0f; + if(uv.umax < 0) uv.umax = 0; + else if(uv.umax > 1.0f) uv.umax = 1.0f; + if(uv.umin > uv.umax) uv.umin = uv.umax; + } + else if(m_context->CLAMP.WMS == 2) + { + float minu = 1.0f * m_context->CLAMP.MINU / w; + float maxu = 1.0f * m_context->CLAMP.MAXU / w; + if(uv.umin < minu) uv.umin = minu; + else if(uv.umin > maxu) uv.umin = maxu; + if(uv.umax < minu) uv.umax = minu; + else if(uv.umax > maxu) uv.umax = maxu; + if(uv.umin > uv.umax) uv.umin = uv.umax; + } + + r.left = max((int)(uv.umin * w) & ~bsm.cx, 0); + r.right = min(((int)(uv.umax * w) + bsm.cx + 1) & ~bsm.cx, w); + } + + if(m_context->CLAMP.WMT < 3) + { + if(m_context->CLAMP.WMT == 0) + { + float fmin = floor(uv.vmin); + float fmax = floor(uv.vmax); + + if(fmin != fmax) {uv.vmin = 0; uv.vmax = 1.0f;} + else {uv.vmin -= fmin; uv.vmax -= fmax;} + + // FIXME: + if(uv.vmin == 0 && uv.vmax != 1.0f) uv.vmax = 1.0f; + } + else if(m_context->CLAMP.WMT == 1) + { + if(uv.vmin < 0) uv.vmin = 0; + else if(uv.vmin > 1.0f) uv.vmin = 1.0f; + if(uv.vmax < 0) uv.vmax = 0; + else if(uv.vmax > 1.0f) uv.vmax = 1.0f; + if(uv.vmin > uv.vmax) uv.vmin = uv.vmax; + } + else if(m_context->CLAMP.WMT == 2) + { + float minv = 1.0f * m_context->CLAMP.MINV / h; + float maxv = 1.0f * m_context->CLAMP.MAXV / h; + if(uv.vmin < minv) uv.vmin = minv; + else if(uv.vmin > maxv) uv.vmin = maxv; + if(uv.vmax < minv) uv.vmax = minv; + else if(uv.vmax > maxv) uv.vmax = maxv; + if(uv.vmin > uv.vmax) uv.vmin = uv.vmax; + } + + r.top = max((int)(uv.vmin * h) & ~bsm.cy, 0); + r.bottom = min(((int)(uv.vmax * h) + bsm.cy + 1) & ~bsm.cy, h); + } + } + //ASSERT(r.left <= r.right); + //ASSERT(r.top <= r.bottom); +} + +void GSRendererHW::SetupDATE(GSTextureCache::GSRenderTarget* rt, GSTextureCache::GSDepthStencil* ds) +{ + if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL)&0x80000000) + + // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows + + float xmin = -1, xmax = +1; + float ymin = -1, ymax = +1; + + float umin = 0, umax = 1; + float vmin = 0, vmax = 1; + + // if(m_count < 1000) { + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + + __m128 xymin = _mm_set1_ps(+1e10); + __m128 xymax = _mm_set1_ps(-1e10); + + for(int i = 0, j = m_count; i < j; i++) + { + xymin = _mm_min_ps(m_vertices[i].m128[0], xymin); + xymax = _mm_max_ps(m_vertices[i].m128[0], xymax); + } + + xmin = xymin.m128_f32[0]; + ymin = xymin.m128_f32[1]; + xmax = xymax.m128_f32[0]; + ymax = xymax.m128_f32[1]; + +#else + + xmin = ymin = +1e10; + xmax = ymax = -1e10; + + for(int i = 0, j = m_count; i < j; i++) + { + float x = m_vertices[i].x; + + if(x < xmin) xmin = x; + if(x > xmax) xmax = x; + + float y = m_vertices[i].y; + + if(y < ymin) ymin = y; + if(y > ymax) ymax = y; + } + +#endif + + float sx = 2.0f * rt->m_scale.x / (rt->m_texture.m_desc.Width * 16); + float sy = 2.0f * rt->m_scale.y / (rt->m_texture.m_desc.Height * 16); + float ox = (float)(int)m_context->XYOFFSET.OFX; + float oy = (float)(int)m_context->XYOFFSET.OFY; + + xmin = xmin * sx - (ox * sx + 1); + xmax = xmax * sx - (ox * sx + 1); + ymin = ymin * sy - (oy * sy + 1); + ymax = ymax * sy - (oy * sy + 1); + + if(xmin < -1) xmin = -1; + if(xmax > +1) xmax = +1; + if(ymin < -1) ymin = -1; + if(ymax > +1) ymax = +1; + + umin = (xmin + 1) / 2; + umax = (xmax + 1) / 2; + vmin = (ymin + 1) / 2; + vmax = (ymax + 1) / 2; + + // } + + // om + + GSTexture2D tmp; + + m_dev.CreateRenderTarget(tmp, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height); + + m_dev->ClearDepthStencilView(ds->m_texture, D3D10_CLEAR_STENCIL, 0, 0); + + m_dev.OMSetRenderTargets(tmp, ds->m_texture); + + m_dev.OMSet(m_date.dss, 1, m_date.bs, 0); + + // ia + + VertexPT1 vertices[] = + { + {xmin, -ymin, 0.5f, 1.0f, umin, vmin}, + {xmax, -ymin, 0.5f, 1.0f, umax, vmin}, + {xmin, -ymax, 0.5f, 1.0f, umin, vmax}, + {xmax, -ymax, 0.5f, 1.0f, umax, vmax}, + }; + + m_dev.IASet(m_dev.m_convert.vb, 4, vertices, m_dev.m_convert.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + // vs + + m_dev.VSSet(m_dev.m_convert.vs, NULL); + + // gs + + m_dev.GSSet(NULL); + + // ps + + m_dev.PSSetShaderResources(rt->m_texture, NULL); + + m_dev.PSSet(m_dev.m_convert.ps[m_context->TEST.DATM ? 2 : 3], m_dev.m_ss_point); + + // rs + + m_dev.RSSet(tmp.m_desc.Width, tmp.m_desc.Height); + + // set + + m_dev->Draw(4, 0); + + // + + m_dev.EndScene(); + + m_dev.Recycle(tmp); +} + +bool GSRendererHW::DetectBadFrame() +{ + DWORD FBP = m_context->FRAME.Block(); + DWORD FPSM = m_context->FRAME.PSM; + + bool TME = PRIM->TME; + DWORD TBP0 = m_context->TEX0.TBP0; + DWORD TPSM = m_context->TEX0.PSM; + + switch(m_crc) + { + case 0x21068223: // okami ntsc/us + case 0x891f223f: // okami pal/fr + + if(m_skip == 0) + { + if(TME && FBP == 0x00e00 && FPSM == PSM_PSMCT32 && TBP0 == 0x00000 && TPSM == PSM_PSMCT32) + { + m_skip = 1000; + } + } + else + { + if(TME && FBP == 0x00e00 && FPSM == PSM_PSMCT32 && TBP0 == 0x03800 && TPSM == PSM_PSMT4) + { + m_skip = 0; + } + } + + break; + + case 0x053D2239: // mgs3s1 ntsc/us + // TODO: case 0x086273D2: mgs3 snake eater pal/fr + + if(m_skip == 0) + { + if(TME && FBP == 0x02000 && FPSM == PSM_PSMCT32 && (TBP0 == 0x00000 || TBP0 == 0x01000) && TPSM == PSM_PSMCT24) + { + m_skip = 1000; // 76, 79 + } + else if(TME && FBP == 0x02800 && FPSM == PSM_PSMCT24 && (TBP0 == 0x00000 || TBP0 == 0x01000) && TPSM == PSM_PSMCT32) + { + m_skip = 1000; // 69 + } + } + else + { + if(!TME && (FBP == 0x00000 || FBP == 0x01000) && FPSM == PSM_PSMCT32) + { + m_skip = 0; + } + } + + break; + + case 0x278722BF: // dbz bt2 ntsc/us + + if(m_skip == 0) + { + if(TME && /*FBP == 0x00000 && FPSM == PSM_PSMCT16 &&*/ TBP0 == 0x02000 && TPSM == PSM_PSMZ16) + { + m_skip = 27; + } + } + + break; + + case 0x72B3802A: // sfex3 ntsc/us + + if(m_skip == 0) + { + if(TME && FBP == 0x00f00 && FPSM == PSM_PSMCT16 && (TBP0 == 0x00500 || TBP0 == 0x00000) && TPSM == PSM_PSMCT32) + { + m_skip = 4; + } + } + + break; + + case 0x28703748: // bully ntsc/us + + if(m_skip == 0) + { + if(TME && (FBP == 0x00000 || FBP == 0x01180) && (TBP0 == 0x00000 || TBP0 == 0x01180) && FBP == TBP0 && FPSM == PSM_PSMCT32 && FPSM == TPSM) + { + return true; // allowed for bully + } + + if(TME && (FBP == 0x00000 || FBP == 0x01180) && FPSM == PSM_PSMCT16S && TBP0 == 0x02300 && TPSM == PSM_PSMZ16S) + { + m_skip = 6; + } + } + else + { + if(!TME && (FBP == 0x00000 || FBP == 0x01180) && FPSM == PSM_PSMCT32) + { + m_skip = 0; + } + } + + break; + + case 0xC19A374E: // shadow of the colossus ntsc/us + + if(m_skip == 0) + { + if(TME && FBP == 0x02b80 && FPSM == PSM_PSMCT24 && TBP0 == 0x01e80 && TPSM == PSM_PSMCT24) + { + m_skip = 9; + } + else if(TME && FBP == 0x01e80 && FPSM == PSM_PSMCT32 && TBP0 == 0x03880 && TPSM == PSM_PSMCT32) + { + m_skip = 8; + } + } + + break; + } + + if(m_skip == 0) + { + if(TME) + { + if(HasSharedBits(FBP, FPSM, TBP0, TPSM)) + { + m_skip = 1; + } + + // depth textures (bully, mgs3s1 intro) + + if(TPSM == PSM_PSMZ32 || TPSM == PSM_PSMZ24 || TPSM == PSM_PSMZ16 || TPSM == PSM_PSMZ16S) + { + // m_perfmon.Put(GSPerfMon::DepthTexture); + m_skip = 1; + } + } + } + + if(m_skip > 0) + { + m_skip--; + + return true; + } + + return false; +} + +bool GSRendererHW::OverrideInput(int& prim, GSTextureCache::GSTexture* tex) +{ + #pragma region ffxii pal video conversion + + if(m_crc == 0x78da0252 || m_crc == 0xc1274668 || m_crc == 0xdc2a467e || m_crc == 0xca284668) + { + static DWORD* video = NULL; + static bool ok = false; + + if(prim == GS_POINTLIST && m_count >= 448*448 && m_count <= 448*512) + { + // incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454 + + if(!video) video = new DWORD[512*512]; + + for(int x = 0, i = 0, rows = m_count / 448; x < 448; x += 16) + { + DWORD* dst = &video[x]; + + for(int y = 0; y < rows; y++, dst += 512) + { + for(int j = 0; j < 16; j++, i++) + { + dst[j] = m_vertices[i].c; + } + } + } + + ok = true; + + return false; + } + else if(prim == GS_LINELIST && m_count == 512*2 && ok) + { + // normally, this step would copy the video onto screen with 512 texture mapped horizontal lines, + // but we use the stored video data to create a new texture, and replace the lines with two triangles + + ok = false; + + m_dev.Recycle(tex->m_texture); + m_dev.Recycle(tex->m_palette); + + m_dev.CreateTexture(tex->m_texture, 512, 512); + + D3D10_BOX box = {0, 0, 0, 448, 512, 1}; + + m_dev->UpdateSubresource(tex->m_texture, 0, &box, video, 512*4, 0); + + m_vertices[0] = m_vertices[0]; + m_vertices[1] = m_vertices[m_count - 1]; + + prim = GS_SPRITE; + m_count = 2; + + return true; + } + } + + #pragma endregion + + return true; +} + diff --git a/gsdx10/GSRendererHW.h b/gsdx10/GSRendererHW.h new file mode 100644 index 0000000..683c817 --- /dev/null +++ b/gsdx10/GSRendererHW.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSRenderer.h" +#include "GSTextureCache.h" +#include "GSTextureFX.h" +#include "GSVertexHW.h" + +class GSRendererHW : public GSRendererT +{ + friend class GSTextureCache; + +protected: + int m_width; + int m_height; + int m_skip; + + GSTextureCache m_tc; + GSTextureFX m_tfx; + + void VertexKick(bool skip); + void DrawingKick(bool skip); + void Draw(); + void Flip(); + void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r); + void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r); + void MinMaxUV(int w, int h, CRect& r); + + struct + { + CComPtr dss; + CComPtr bs; + } m_date; + + void SetupDATE(GSTextureCache::GSRenderTarget* rt, GSTextureCache::GSDepthStencil* ds); + bool OverrideInput(int& prim, GSTextureCache::GSTexture* tex); + bool DetectBadFrame(); + +public: + GSRendererHW(BYTE* base, bool mt, void (*irq)(), bool nloophack); + + bool Create(LPCTSTR title); +}; \ No newline at end of file diff --git a/gsdx10/GSRendererNull.cpp b/gsdx10/GSRendererNull.cpp new file mode 100644 index 0000000..c6d105c --- /dev/null +++ b/gsdx10/GSRendererNull.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSRendererNull.h" + +GSRendererNull::GSRendererNull(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSRendererT(base, mt, irq, nloophack) +{ +} + +GSRendererNull::~GSRendererNull() +{ +} + +void GSRendererNull::VertexKick(bool skip) +{ + m_vl.AddTail(); + + __super::VertexKick(skip); +} + +void GSRendererNull::DrawingKick(bool skip) +{ + VertexNull v; + + switch(PRIM->PRIM) + { + case GS_POINTLIST: + m_vl.RemoveAt(0, v); + break; + case GS_LINELIST: + m_vl.RemoveAt(0, v); + m_vl.RemoveAt(0, v); + break; + case GS_LINESTRIP: + m_vl.RemoveAt(0, v); + m_vl.GetAt(0, v); + break; + case GS_TRIANGLELIST: + m_vl.RemoveAt(0, v); + m_vl.RemoveAt(0, v); + m_vl.RemoveAt(0, v); + break; + case GS_TRIANGLESTRIP: + m_vl.RemoveAt(0, v); + m_vl.GetAt(0, v); + m_vl.GetAt(1, v); + break; + case GS_TRIANGLEFAN: + m_vl.GetAt(0, v); + m_vl.RemoveAt(1, v); + m_vl.GetAt(1, v); + break; + case GS_SPRITE: + m_vl.RemoveAt(0, v); + m_vl.RemoveAt(0, v); + break; + default: + ASSERT(0); + m_vl.RemoveAll(); + return; + } + + if(!skip) + { + //m_perfmon.Put(GSPerfMon::Prim, 1); + } +} + +void GSRendererNull::Flip() +{ + FlipInfo rt[2]; + FinishFlip(rt); +} diff --git a/gsdx10/GSRendererNull.h b/gsdx10/GSRendererNull.h new file mode 100644 index 0000000..09c2e82 --- /dev/null +++ b/gsdx10/GSRendererNull.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSRenderer.h" + +struct VertexNull {}; + +class GSRendererNull : public GSRendererT +{ +protected: + void VertexKick(bool skip); + void DrawingKick(bool skip); + void Draw() {} + void Flip(); + +public: + GSRendererNull(BYTE* base, bool mt, void (*irq)(), bool nloophack); + virtual ~GSRendererNull(); +}; \ No newline at end of file diff --git a/gsdx10/GSRendererSW.cpp b/gsdx10/GSRendererSW.cpp new file mode 100644 index 0000000..c7474db --- /dev/null +++ b/gsdx10/GSRendererSW.cpp @@ -0,0 +1,1039 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSRendererSW.h" + +#pragma warning(push) +#pragma warning(disable: 4701) + +template +GSRendererSW::GSRendererSW(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSRendererT(base, mt, irq, nloophack) +{ + int i = SHRT_MIN; + int j = 0; + for(; i < 0; i++, j++) {m_clip[j] = 0; m_mask[j] = (BYTE)i;} + for(; i < 256; i++, j++) {m_clip[j] = (BYTE)i; m_mask[j] = (BYTE)i;} + for(; i <= SHRT_MAX; i++, j++) {m_clip[j] = 255; m_mask[j] = (BYTE)i;} + + m_uv = (uv_wrap_t*)_aligned_malloc(sizeof(uv_wrap_t), 16); + + // w00t :P + + #define InitATST(iZTST, iATST) \ + m_dv[iZTST][iATST] = &GSRendererSW::DrawVertex; \ + + #define InitZTST(iZTST) \ + InitATST(iZTST, 0) \ + InitATST(iZTST, 1) \ + InitATST(iZTST, 2) \ + InitATST(iZTST, 3) \ + InitATST(iZTST, 4) \ + InitATST(iZTST, 5) \ + InitATST(iZTST, 6) \ + InitATST(iZTST, 7) \ + + #define InitDV() \ + InitZTST(0) \ + InitZTST(1) \ + InitZTST(2) \ + InitZTST(3) \ + + InitDV(); + + #define InitTFX(iLOD, bLCM, bTCC, iTFX) \ + m_dvtfx[iLOD][bLCM][bTCC][iTFX] = &GSRendererSW::DrawVertexTFX; \ + + #define InitTCC(iLOD, bLCM, bTCC) \ + InitTFX(iLOD, bLCM, bTCC, 0) \ + InitTFX(iLOD, bLCM, bTCC, 1) \ + InitTFX(iLOD, bLCM, bTCC, 2) \ + InitTFX(iLOD, bLCM, bTCC, 3) \ + + #define InitLCM(iLOD, bLCM) \ + InitTCC(iLOD, bLCM, false) \ + InitTCC(iLOD, bLCM, true) \ + + #define InitLOD(iLOD) \ + InitLCM(iLOD, false) \ + InitLCM(iLOD, true) \ + + #define InitDVTFX() \ + InitLOD(0) \ + InitLOD(1) \ + InitLOD(2) \ + InitLOD(3) \ + + + InitDVTFX(); +} + +template +GSRendererSW::~GSRendererSW() +{ + _aligned_free(m_uv); +} + +template +void GSRendererSW::DrawingKick(bool skip) +{ + Vertex* v = &m_vertices[m_count]; + int nv = 0; + + switch(PRIM->PRIM) + { + case GS_POINTLIST: + m_vl.RemoveAt(0, v[0]); + nv = 1; + break; + case GS_LINELIST: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + nv = 2; + break; + case GS_LINESTRIP: + m_vl.RemoveAt(0, v[0]); + m_vl.GetAt(0, v[1]); + nv = 2; + break; + case GS_TRIANGLELIST: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + m_vl.RemoveAt(0, v[2]); + nv = 3; + break; + case GS_TRIANGLESTRIP: + m_vl.RemoveAt(0, v[0]); + m_vl.GetAt(0, v[1]); + m_vl.GetAt(1, v[2]); + nv = 3; + break; + case GS_TRIANGLEFAN: + m_vl.GetAt(0, v[0]); + m_vl.RemoveAt(1, v[1]); + m_vl.GetAt(1, v[2]); + nv = 3; + break; + case GS_SPRITE: + m_vl.RemoveAt(0, v[0]); + m_vl.RemoveAt(0, v[1]); + nv = 4; + v[0].p.z = v[1].p.z; + v[0].p.q = v[1].p.q; + v[0].t.z = v[1].t.z; + v[2] = v[1]; + v[3] = v[1]; + v[1].p.y = v[0].p.y; + v[1].t.y = v[0].t.y; + v[2].p.x = v[0].p.x; + v[2].t.x = v[0].t.x; + break; + default: + ASSERT(0); + return; + } + + if(skip) + { + return; + } + + Vertex::Scalar sx0((int)m_context->SCISSOR.SCAX0); + Vertex::Scalar sy0((int)m_context->SCISSOR.SCAY0); + Vertex::Scalar sx1((int)m_context->SCISSOR.SCAX1); + Vertex::Scalar sy1((int)m_context->SCISSOR.SCAY1); + + switch(nv) + { + case 1: + if(v[0].p.x < sx0 + || v[0].p.x > sx1 + || v[0].p.y < sy0 + || v[0].p.y > sy1) + return; + break; + case 2: + if(v[0].p.x < sx0 && v[1].p.x < sx0 + || v[0].p.x > sx1 && v[1].p.x > sx1 + || v[0].p.y < sy0 && v[1].p.y < sy0 + || v[0].p.y > sy1 && v[1].p.y > sy1) + return; + break; + case 3: + if(v[0].p.x < sx0 && v[1].p.x < sx0 && v[2].p.x < sx0 + || v[0].p.x > sx1 && v[1].p.x > sx1 && v[2].p.x > sx1 + || v[0].p.y < sy0 && v[1].p.y < sy0 && v[2].p.y < sy0 + || v[0].p.y > sy1 && v[1].p.y > sy1 && v[2].p.y > sy1) + return; + break; + case 4: + if(v[0].p.x < sx0 && v[3].p.x < sx0 + || v[0].p.x > sx1 && v[3].p.x > sx1 + || v[0].p.y < sy0 && v[3].p.y < sy0 + || v[0].p.y > sy1 && v[3].p.y > sy1) + return; + break; + default: + __assume(0); + } + + if(PRIM->IIP == 0 || PRIM->PRIM == GS_SPRITE) + { + Vertex::Vector c = v[nv - 1].c; + + for(int i = 0; i < nv - 1; i++) + { + v[i].c = c; + } + } + + m_count += nv; +} +/* +extern int s_n; +extern bool s_dump; +extern bool s_save; +*/ + +static int bZTE; // , iZTST, iATST, iLOD, bLCM, bTCC, iTFX; + +template +void GSRendererSW::Draw() +{ +/* +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + if(PRIM->TME) if(s_save) {m_mem.SetupCLUT32(m_context->TEX0, m_env.TEXA); m_mem.SaveBMP(m_dev, str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);} + + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) {m_mem.SaveBMP(m_dev, str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, m_regs.GetFrameSize(1).cx, 512);}//m_regs.GetFrameSize(1).cy); +} +*/ + bZTE = m_context->TEST.ZTE && m_context->TEST.ZTST >= 2 || !m_context->ZBUF.ZMSK; + + int iZTST = !m_context->TEST.ZTE ? 1 : m_context->TEST.ZTST; + int iATST = !m_context->TEST.ATE ? 1 : m_context->TEST.ATST; + + m_pDrawVertex = m_dv[iZTST][iATST]; + + if(PRIM->TME) + { + int iLOD = (m_context->TEX1.MMAG & 1) + (m_context->TEX1.MMIN & 1); + int bLCM = m_context->TEX1.LCM ? 1 : 0; + int bTCC = m_context->TEX0.TCC ? 1 : 0; + int iTFX = m_context->TEX0.TFX; + + if(PRIM->FST) + { + iLOD = 3; + bLCM = m_context->TEX1.K <= 0 && (m_context->TEX1.MMAG & 1) || m_context->TEX1.K > 0 && (m_context->TEX1.MMIN & 1); + } + + if(m_filter != D3D10_FILTER_TYPE_LINEAR) + { + if(iLOD == 3) bLCM = 0; + else iLOD = 0; + } + + m_pDrawVertexTFX = m_dvtfx[iLOD][bLCM][bTCC][iTFX]; + + SetupTexture(); + } + + m_scissor.SetRect( + max(m_context->SCISSOR.SCAX0, 0), + max(m_context->SCISSOR.SCAY0, 0), + min(m_context->SCISSOR.SCAX1 + 1, m_context->FRAME.FBW * 64), + min(m_context->SCISSOR.SCAY1 + 1, 4096)); + + m_clamp = (m_env.COLCLAMP.CLAMP ? m_clip : m_mask) + 32768; + + int prims = 0; + + Vertex* vertices = m_vertices; + + switch(PRIM->PRIM) + { + case GS_POINTLIST: + prims = m_count; + for(int i = 0; i < prims; i++, vertices++) DrawPoint(vertices); + break; + case GS_LINELIST: + case GS_LINESTRIP: + ASSERT(!(m_count & 1)); + prims = m_count / 2; + for(int i = 0; i < prims; i++, vertices += 2) DrawLine(vertices); + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + ASSERT(!(m_count % 3)); + prims = m_count / 3; + for(int i = 0; i < prims; i++, vertices += 3) DrawTriangle(vertices); + break; + case GS_SPRITE: + ASSERT(!(m_count & 3)); + prims = m_count / 4; + for(int i = 0; i < prims; i++, vertices += 4) DrawSprite(vertices); + break; + default: + __assume(0); + } +/* + m_perfmon.Put(GSPerfMon::Prim, nPrims); + m_perfmon.Put(GSPerfMon::Draw, 1); + +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) {m_mem.SaveBMP(m_dev, str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, m_regs.GetFrameSize(1).cx, 512);}//m_regs.GetFrameSize(1).cy); +} +*/ +} + +template +void GSRendererSW::Flip() +{ + HRESULT hr; + + FlipInfo src[2]; + + for(int i = 0; i < countof(src); i++) + { + if(!IsEnabled(i)) + { + continue; + } + + int w = DISPFB[i]->FBW * 64; + int h = GetFrameRect(i).bottom; // TODO: round up + + //GSLocalMemory::RoundUp(, GSLocalMemory::GetBlockSize(DISPFB[i].PSM)); + + if(m_texture[i].m_desc.Width != (UINT)w || m_texture[i].m_desc.Height != (UINT)h) + { + m_texture[i] = GSTexture2D(); + } + + if(!m_texture[i]) + { + hr = m_dev.CreateTexture(m_texture[i], w, h); + + if(FAILED(hr)) continue; + } + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = DISPFB[i]->Block(); + TEX0.TBW = DISPFB[i]->FBW; + TEX0.PSM = DISPFB[i]->PSM; + + GIFRegCLAMP CLAMP; + + CLAMP.WMS = CLAMP.WMT = 1; + + static BYTE* buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); + static int pitch = 1024 * 4; + + m_mem.ReadTexture(CRect(0, 0, w, h), buff, pitch, TEX0, m_env.TEXA, CLAMP); + + D3D10_BOX box = {0, 0, 0, w, h, 1}; + + m_dev->UpdateSubresource(m_texture[i], 0, &box, buff, pitch, 0); + + src[i].t = m_texture[i]; + src[i].s = GSScale(1, 1); +/* +if(s_dump) +{ + CString str; + str.Format(_T("c:\\temp1\\_%05d_f%I64d_fr%d_%05x.bmp"), s_n++, m_perfmon.GetFrame(), i, (int)TEX0.TBP0); + if(s_save) ::D3DX10SaveTextureToFile(m_texture[i], D3DX10_IFF_BMP, str); +} +*/ +// s_dump = m_perfmon.GetFrame() >= 5000; + + } + + FinishFlip(src); +} + +template +void GSRendererSW::RowInit(int x, int y) +{ + m_faddr_x0 = (m_context->ftbl->pa)(0, y, m_context->FRAME.Block(), m_context->FRAME.FBW); + m_faddr_ro = &m_context->ftbl->rowOffset[y&7][x]; + + if(bZTE) + { + m_zaddr_x0 = (m_context->ztbl->pa)(0, y, m_context->ZBUF.Block(), m_context->FRAME.FBW); + m_zaddr_ro = &m_context->ztbl->rowOffset[y&7][x]; + } + + m_fx = x-1; // -1 because RowStep() will do +1, yea lame... + m_fy = y; + + RowStep(); +} + +template +void GSRendererSW::RowStep() +{ + m_fx++; + + m_faddr = m_faddr_x0 + *m_faddr_ro++; + + if(bZTE) + { + m_zaddr = m_zaddr_x0 + *m_zaddr_ro++; + } +} + +template +void GSRendererSW::DrawPoint(Vertex* v) +{ + CPoint p = *v; + + if(m_scissor.PtInRect(p)) + { + RowInit(p.x, p.y); + + (this->*m_pDrawVertex)(*v); + } +} + +template +void GSRendererSW::DrawLine(Vertex* v) +{ + Vertex dv = v[1] - v[0]; + + Vertex::Vector dp = dv.p; + + dp.x.abs(); + dp.y.abs(); + + int dx = (int)dp.x; + int dy = (int)dp.y; + + if(dx == 0 && dy == 0) return; + + int i = dx > dy ? 0 : 1; + + Vertex edge = v[0]; + Vertex dedge = dv / dp.v[i]; + + // TODO: clip with the scissor + + int steps = (int)dp.v[i]; + + while(steps-- > 0) + { + CPoint p = edge; + + if(m_scissor.PtInRect(p)) + { + RowInit(p.x, p.y); + + (this->*m_pDrawVertex)(edge); + } + + edge += dedge; + } +} + +template +void GSRendererSW::DrawTriangle(Vertex* v) +{ + if(v[1].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[1]);} + if(v[2].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[2]);} + if(v[2].p.y < v[1].p.y) {Vertex::Exchange(&v[1], &v[2]);} + + if(!(v[0].p.y < v[2].p.y)) return; + + Vertex v01 = v[1] - v[0]; + Vertex v02 = v[2] - v[0]; + + Vertex::Scalar temp = v01.p.y / v02.p.y; + Vertex::Scalar longest = temp * v02.p.x - v01.p.x; + + int ledge, redge; + if(Vertex::Scalar(0) < longest) {ledge = 0; redge = 1; if(longest < Vertex::Scalar(1)) longest = Vertex::Scalar(1);} + else if(longest < Vertex::Scalar(0)) {ledge = 1; redge = 0; if(Vertex::Scalar(-1) < longest) longest = Vertex::Scalar(-1);} + else return; + + Vertex edge[2] = {v[0], v[0]}; + + Vertex dedge[2]; + dedge[0].p.y = dedge[1].p.y = Vertex::Scalar(1); + if(Vertex::Scalar(0) < v01.p.y) dedge[ledge] = v01 / v01.p.y; + if(Vertex::Scalar(0) < v02.p.y) dedge[redge] = v02 / v02.p.y; + + Vertex scan; + + Vertex dscan = (v02 * temp - v01) / longest; + dscan.p.y = 0; + + for(int i = 0; i < 2; i++, v++) + { + int top = edge[0].p.y.ceil_i(); + int bottom = v[1].p.y.ceil_i(); + + if(top < m_scissor.top) top = min(m_scissor.top, bottom); + if(bottom > m_scissor.bottom) bottom = m_scissor.bottom; + + if(edge[0].p.y < Vertex::Scalar(top)) // for(int j = 0; j < 2; j++) edge[j] += dedge[j] * ((float)top - edge[0].p.y); + { + Vertex::Scalar dy = Vertex::Scalar(top) - edge[0].p.y; + edge[0] += dedge[0] * dy; + edge[1].p.x += dedge[1].p.x * dy; + edge[0].p.y = edge[1].p.y = Vertex::Scalar(top); + } + + ASSERT(top >= bottom || (int)((edge[1].p.y - edge[0].p.y) * 10) == 0); + + for(; top < bottom; top++) + { + int left = edge[0].p.x.ceil_i(); + int right = edge[1].p.x.ceil_i(); + + if(left < m_scissor.left) left = m_scissor.left; + if(right > m_scissor.right) right = m_scissor.right; + + if(right > left) + { + scan = edge[0]; + + if(edge[0].p.x < Vertex::Scalar(left)) + { + scan += dscan * (Vertex::Scalar(left) - edge[0].p.x); + scan.p.x = Vertex::Scalar(left); + } + + RowInit(left, top); + + for(int steps = right - left; steps > 0; steps--) + { + (this->*m_pDrawVertex)(scan); + scan += dscan; + RowStep(); + } + } + + // for(int j = 0; j < 2; j++) edge[j] += dedge[j]; + edge[0] += dedge[0]; + edge[1].p += dedge[1].p; + } + + if(v[1].p.y < v[2].p.y) + { + edge[ledge] = v[1]; + dedge[ledge] = (v[2] - v[1]) / (v[2].p.y - v[1].p.y); + edge[ledge] += dedge[ledge] * (edge[ledge].p.y.ceil_s() - edge[ledge].p.y); + } + } +} + +template +void GSRendererSW::DrawSprite(Vertex* v) +{ + if(v[2].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[2]); Vertex::Exchange(&v[1], &v[3]);} + if(v[1].p.x < v[0].p.x) {Vertex::Exchange(&v[0], &v[1]); Vertex::Exchange(&v[2], &v[3]);} + + if(v[0].p.x == v[1].p.x || v[0].p.y == v[2].p.y) return; + + Vertex v01 = v[1] - v[0]; + Vertex v02 = v[2] - v[0]; + + Vertex edge = v[0]; + Vertex dedge = v02 / v02.p.y; + Vertex scan; + Vertex dscan = v01 / v01.p.x; + + int top = v[0].p.y.ceil_i(); + int bottom = v[2].p.y.ceil_i(); + + if(top < m_scissor.top) top = min(m_scissor.top, bottom); + if(bottom > m_scissor.bottom) bottom = m_scissor.bottom; + + if(v[0].p.y < Vertex::Scalar(top)) edge += dedge * (Vertex::Scalar(top) - v[0].p.y); + + int left = v[0].p.x.ceil_i(); + int right = v[1].p.x.ceil_i(); + + if(left < m_scissor.left) left = m_scissor.left; + if(right > m_scissor.right) right = m_scissor.right; + + if(left >= right || top >= bottom) return; + + if(v[0].p.x < Vertex::Scalar(left)) edge += dscan * (Vertex::Scalar(left) - v[0].p.x); + + if(DrawFilledRect(left, top, right, bottom, edge)) + return; + + for(; top < bottom; top++) + { + scan = edge; + + RowInit(left, top); + + for(int steps = right - left; steps > 0; steps--) + { + (this->*m_pDrawVertex)(scan); + scan += dscan; + RowStep(); + } + + edge += dedge; + } +} + +template +bool GSRendererSW::DrawFilledRect(int left, int top, int right, int bottom, const Vertex& v) +{ + if(left >= right || top >= bottom) + return false; + + ASSERT(top >= 0); + ASSERT(bottom >= 0); + + if(PRIM->IIP && PRIM->PRIM != GS_SPRITE + || m_context->TEST.ZTE && m_context->TEST.ZTST != 1 + || m_context->TEST.ATE && m_context->TEST.ATST != 1 + || m_context->TEST.DATE + || PRIM->TME + || PRIM->ABE + || PRIM->FGE + || m_env.DTHE.DTHE + || m_context->FRAME.FBMSK) + return false; + + DWORD FBP = m_context->FRAME.Block(), FBW = m_context->FRAME.FBW; + DWORD ZBP = m_context->ZBUF.Block(); + + if(!m_context->ZBUF.ZMSK) + { + m_mem.FillRect(CRect(left, top, right, bottom), v.GetZ(), m_context->ZBUF.PSM, ZBP, FBW); + } + + __declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;}; + Cui64 = v.c; + + Rf = m_clamp[Rf]; + Gf = m_clamp[Gf]; + Bf = m_clamp[Bf]; + Af |= m_context->FBA.FBA << 7; + + DWORD Cdw; + + if(m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S) + { + Cdw = ((DWORD)(Rf&0xf8) >> 3) + | ((DWORD)(Gf&0xf8) << 2) + | ((DWORD)(Bf&0xf8) << 7) + | ((DWORD)(Af&0x80) << 8); + } + else + { +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + __m128i r0 = _mm_load_si128((__m128i*)&Cui64); + Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0)); +#else + Cdw = ((DWORD)(Rf&0xff) << 0) + | ((DWORD)(Gf&0xff) << 8) + | ((DWORD)(Bf&0xff) << 16) + | ((DWORD)(Af&0xff) << 24); +#endif + } + + m_mem.FillRect(CRect(left, top, right, bottom), Cdw, m_context->FRAME.PSM, FBP, FBW); + + return true; +} + +template +template +void GSRendererSW::DrawVertex(const Vertex& v) +{ + DWORD vz; + + switch(iZTST) + { + case 0: return; + case 1: break; + case 2: + vz = v.GetZ(); + if(vz < m_mem.readPixelX(m_context->ZBUF.PSM, m_zaddr)) return; + // if(vz < (m_mem.*m_context->ztbl->rpa)(m_zaddr)) return; + break; + case 3: + vz = v.GetZ(); + if(vz <= m_mem.readPixelX(m_context->ZBUF.PSM, m_zaddr)) return; + // if(vz <= (m_mem.*m_context->ztbl->rpa)(m_zaddr)) return; + break; + default: + __assume(0); + } + + union + { + struct {Vertex::Vector Cf, Cd, Ca;}; + struct {Vertex::Vector Cfda[3];}; + }; + + Cf = v.c; + + if(PRIM->TME) + { + (this->*m_pDrawVertexTFX)(Cf, v); + } + + if(PRIM->FGE) + { + Vertex::Scalar a = Cf.a; + Vertex::Vector Cfog((DWORD)m_env.FOGCOL.ai32[0]); + Cf = Cfog + (Cf - Cfog) * v.t.z; + Cf.a = a; + } + + BOOL ZMSK = m_context->ZBUF.ZMSK; + DWORD FBMSK = m_context->FRAME.FBMSK; + + bool fAlphaPass = true; + + BYTE Af = (BYTE)(int)Cf.a; + + switch(iATST) + { + case 0: fAlphaPass = false; break; + case 1: fAlphaPass = true; break; + case 2: fAlphaPass = Af < m_context->TEST.AREF; break; + case 3: fAlphaPass = Af <= m_context->TEST.AREF; break; + case 4: fAlphaPass = Af == m_context->TEST.AREF; break; + case 5: fAlphaPass = Af >= m_context->TEST.AREF; break; + case 6: fAlphaPass = Af > m_context->TEST.AREF; break; + case 7: fAlphaPass = Af != m_context->TEST.AREF; break; + default: __assume(0); + } + + if(!fAlphaPass) + { + switch(m_context->TEST.AFAIL) + { + case 0: return; + case 1: ZMSK = 1; break; // RGBA + case 2: FBMSK = 0xffffffff; break; // Z + case 3: FBMSK = 0xff000000; ZMSK = 1; break; // RGB + default: __assume(0); + } + } + + if(!ZMSK) + { + if(iZTST != 2 && iZTST != 3) vz = v.GetZ(); + m_mem.writePixelX(m_context->ZBUF.PSM, m_zaddr, vz); + // (m_mem.*m_context->ztbl->wpa)(m_zaddr, vz); + } + + if(FBMSK != ~0) + { + if(m_context->TEST.DATE && m_context->FRAME.PSM <= PSM_PSMCT16S && m_context->FRAME.PSM != PSM_PSMCT24) + { + DWORD c = m_mem.readPixelX(m_context->FRAME.PSM, m_faddr); + // DWORD c = (m_mem.*m_context->ftbl->rpa)(m_faddr); + BYTE A = (BYTE)(c >> (m_context->FRAME.PSM == PSM_PSMCT32 ? 31 : 15)); + if(A ^ m_context->TEST.DATM) return; + } + + // FIXME: for AA1 the value of Af should be calculated from the pixel coverage... + + bool fABE = (PRIM->ABE || PRIM->AA1 && (PRIM->PRIM == 1 || PRIM->PRIM == 2)) && (!m_env.PABE.PABE || (int)Cf.a >= 0x80); + + if(FBMSK || fABE) + { + GIFRegTEXA TEXA; + /* + TEXA.AEM = 0; + TEXA.TA0 = 0; + TEXA.TA1 = 0x80; + */ + TEXA.ai32[0] = 0; + TEXA.ai32[1] = 0x80; + + Cd = m_mem.readTexelX(m_context->FRAME.PSM, m_faddr, TEXA); + // Cd = (m_mem.*m_context->ftbl->rta)(m_faddr, TEXA); + } + + if(fABE) + { + Ca = Vertex::Vector(Vertex::Scalar(0)); + Ca.a = Vertex::Scalar((int)m_context->ALPHA.FIX); + + Vertex::Scalar a = Cf.a; + Cf = ((Cfda[m_context->ALPHA.A] - Cfda[m_context->ALPHA.B]) * Cfda[m_context->ALPHA.C].a >> 7) + Cfda[m_context->ALPHA.D]; + Cf.a = a; + } + + DWORD Cdw; + + if(m_env.COLCLAMP.CLAMP && !m_env.DTHE.DTHE) + { + Cdw = Cf; + } + else + { + __declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;}; + Cui64 = Cf; + + if(m_env.DTHE.DTHE) + { + short DMxy = (signed char)((*((WORD*)&m_env.DIMX.i64 + (m_fy&3)) >> ((m_fx&3)<<2)) << 5) >> 5; + Rf = (short)(Rf + DMxy); + Gf = (short)(Gf + DMxy); + Bf = (short)(Bf + DMxy); + } + + Rf = m_clamp[Rf]; + Gf = m_clamp[Gf]; + Bf = m_clamp[Bf]; + Af |= m_context->FBA.FBA << 7; + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + __m128i r0 = _mm_load_si128((__m128i*)&Cui64); + Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0)); +#else + Cdw = ((DWORD)(Rf&0xff) << 0) + | ((DWORD)(Gf&0xff) << 8) + | ((DWORD)(Bf&0xff) << 16) + | ((DWORD)(Af&0xff) << 24); +#endif + } + + if(FBMSK != 0) + { + Cdw = (Cdw & ~FBMSK) | ((DWORD)Cd & FBMSK); + } + + m_mem.writeFrameX(m_context->FRAME.PSM, m_faddr, Cdw); + // (m_mem.*m_context->ftbl->wfa)(m_faddr, Cdw); + } +} + +static const float s_one_over_log2 = 1.0f / log(2.0f); + +template +template +void GSRendererSW::DrawVertexTFX(typename Vertex::Vector& Cf, const Vertex& v) +{ + ASSERT(PRIM->TME); + + Vertex::Vector t = v.t; + + bool fBiLinear = iLOD == 2; + + if(iLOD == 3) + { + fBiLinear = !!bLCM; + } + else + { + t.q.rcp(); + t *= t.q; + + if(iLOD == 1) + { + float lod = (float)(int)m_context->TEX1.K; + if(!bLCM) lod += log(fabs((float)t.q)) * s_one_over_log2 * (1 << m_context->TEX1.L); + fBiLinear = lod <= 0 && (m_context->TEX1.MMAG & 1) || lod > 0 && (m_context->TEX1.MMIN & 1); + } + } + + if(fBiLinear) t -= Vertex::Scalar(0.5f); + + __declspec(align(16)) short ituv[8] = + { + (short)(int)t.x, + (short)(int)t.x+1, + (short)(int)t.y, + (short)(int)t.y+1 + }; + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + + __m128i uv = _mm_load_si128((__m128i*)ituv); + __m128i mask = _mm_load_si128((__m128i*)m_uv->mask); + __m128i region = _mm_or_si128(_mm_and_si128(uv, *(__m128i*)m_uv->and), *(__m128i*)m_uv->or); + __m128i clamp = _mm_min_epi16(_mm_max_epi16(uv, *(__m128i*)m_uv->min), *(__m128i*)m_uv->max); + _mm_store_si128((__m128i*)ituv, _mm_or_si128(_mm_and_si128(region, mask), _mm_andnot_si128(mask, clamp))); + +#else + + for(int i = 0; i < 4; i++) + { + short region = (ituv[i] & m_uv->and[i]) | m_uv->or[i]; + short clamp = ituv[i] < m_uv->min[i] ? m_uv->min[i] : ituv[i] > m_uv->max[i] ? m_uv->max[i] : ituv[i]; + ituv[i] = (region & m_uv->mask[i]) | (clamp & ~m_uv->mask[i]); + } + +#endif + + Vertex::Vector Ct[4]; + + if(fBiLinear) + { + for(int i = 0; i < 4; i++) + { + Ct[i] = m_mem.readTexelX(m_context->TEX0.PSM, ituv[i&1], ituv[2+(i>>1)], m_context->TEX0, m_env.TEXA); + // Ct[i] = (m_mem.*m_context->ttbl->rt)(ituv[i&1], ituv[2+(i>>1)], m_context->TEX0, m_env.TEXA); + // Ct[i] = m_pTexture[(ituv[2+(i>>1)] << m_context->TEX0.TW) + ituv[i&1]]; + } + + Vertex::Vector ft = t - t.floor(); + + Ct[0] = Ct[0] + (Ct[1] - Ct[0]) * ft.x; + Ct[2] = Ct[2] + (Ct[3] - Ct[2]) * ft.x; + Ct[0] = Ct[0] + (Ct[2] - Ct[0]) * ft.y; + } + else + { + Ct[0] = m_mem.readTexelX(m_context->TEX0.PSM, ituv[0], ituv[2], m_context->TEX0, m_env.TEXA); + // Ct[0] = (m_mem.*m_context->ttbl->rt)(ituv[0], ituv[2], m_context->TEX0, m_env.TEXA); + // Ct[0] = m_pTexture[(ituv[2] << m_context->TEX0.TW) + ituv[0]]; + } + + Vertex::Scalar a = Cf.a; + + switch(iTFX) + { + case 0: + Cf = (Cf * Ct[0] >> 7); + if(!bTCC) Cf.a = a; + break; + case 1: + Cf = Ct[0]; + break; + case 2: + Cf = (Cf * Ct[0] >> 7) + Cf.a; + Cf.a = !bTCC ? a : (Ct[0].a + a); + break; + case 3: + Cf = (Cf * Ct[0] >> 7) + Cf.a; + Cf.a = !bTCC ? a : Ct[0].a; + break; + default: + __assume(0); + } + + Cf.sat(); +} + +template +void GSRendererSW::SetupTexture() +{ + m_mem.SetupCLUT32(m_context->TEX0, m_env.TEXA); + + // + + int tw = 1 << m_context->TEX0.TW; + int th = 1 << m_context->TEX0.TH; + + switch(m_context->CLAMP.WMS) + { + case 0: m_uv->and[0] = (short)(tw-1); m_uv->or[0] = 0; m_uv->mask[0] = 0xffff; break; + case 1: m_uv->min[0] = 0; m_uv->max[0] = (short)(tw-1); m_uv->mask[0] = 0; break; + case 2: m_uv->min[0] = (short)m_context->CLAMP.MINU; m_uv->max[0] = (short)m_context->CLAMP.MAXU; m_uv->mask[0] = 0; break; + case 3: m_uv->and[0] = (short)m_context->CLAMP.MINU; m_uv->or[0] = (short)m_context->CLAMP.MAXU; m_uv->mask[0] = 0xffff; break; + default: __assume(0); + } + + m_uv->and[1] = m_uv->and[0]; + m_uv->or[1] = m_uv->or[0]; + m_uv->min[1] = m_uv->min[0]; + m_uv->max[1] = m_uv->max[0]; + m_uv->mask[1] = m_uv->mask[0]; + + switch(m_context->CLAMP.WMT) + { + case 0: m_uv->and[2] = (short)(th-1); m_uv->or[2] = 0; m_uv->mask[2] = 0xffff; break; + case 1: m_uv->min[2] = 0; m_uv->max[2] = (short)(th-1); m_uv->mask[2] = 0; break; + case 2: m_uv->min[2] = (short)m_context->CLAMP.MINV; m_uv->max[2] = (short)m_context->CLAMP.MAXV; m_uv->mask[2] = 0; break; + case 3: m_uv->and[2] = (short)m_context->CLAMP.MINV; m_uv->or[2] = (short)m_context->CLAMP.MAXV; m_uv->mask[2] = 0xffff; break; + default: __assume(0); + } + + m_uv->and[3] = m_uv->and[2]; + m_uv->or[3] = m_uv->or[2]; + m_uv->min[3] = m_uv->min[2]; + m_uv->max[3] = m_uv->max[2]; + m_uv->mask[3] = m_uv->mask[2]; +} + +// +// GSRendererSWFP +// + +GSRendererSWFP::GSRendererSWFP(BYTE* base, bool mt, void (*irq)(), bool nloophack) + : GSRendererSW(base, mt, irq, nloophack) +{ +} + +void GSRendererSWFP::VertexKick(bool skip) +{ + GSVertexSWFP& v = m_vl.AddTail(); + + v.p.x = (int)m_v.XYZ.X - (int)m_context->XYOFFSET.OFX; + v.p.y = (int)m_v.XYZ.Y - (int)m_context->XYOFFSET.OFY; + v.p *= GSVertexSWFP::Scalar(1.0f / 16); + v.p.z = (float)m_v.XYZ.Z; + //v.p.z = (float)(m_v.XYZ.Z >> 16); + //v.p.q = (float)(m_v.XYZ.Z & 0xffff); + + v.c = (DWORD)m_v.RGBAQ.ai32[0]; + + if(PRIM->FGE) + { + v.t.z = (float)m_v.FOG.F * (1.0f / 255); + } + + if(PRIM->TME) + { + if(PRIM->FST) + { + v.t.x = (float)(int)m_v.UV.U; + v.t.y = (float)(int)m_v.UV.V; + v.t *= GSVertexSWFP::Scalar(1.0f / 16); + v.t.q = 1.0f; + } + else + { + v.t.x = m_v.ST.S * (1 << m_context->TEX0.TW); + v.t.y = m_v.ST.T * (1 << m_context->TEX0.TH); + v.t.q = m_v.RGBAQ.Q; + } + } + + __super::VertexKick(skip); +} + +#pragma warning(pop) diff --git a/gsdx10/GSRendererSW.h b/gsdx10/GSRendererSW.h new file mode 100644 index 0000000..3f4141c --- /dev/null +++ b/gsdx10/GSRendererSW.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSRenderer.h" +#include "GSVertexSW.h" + +template +class GSRendererSW : public GSRendererT +{ +protected: + GSTexture2D m_texture[2]; + + void DrawingKick(bool skip); + void Draw(); + void Flip(); + + DWORD m_faddr_x0, m_faddr; + DWORD m_zaddr_x0, m_zaddr; + int* m_faddr_ro; + int* m_zaddr_ro; + int m_fx, m_fy; + void RowInit(int x, int y); + void RowStep(); + + void DrawPoint(Vertex* v); + void DrawLine(Vertex* v); + void DrawTriangle(Vertex* v); + void DrawSprite(Vertex* v); + bool DrawFilledRect(int left, int top, int right, int bottom, const Vertex& v); + + template + void DrawVertex(const Vertex& v); + + typedef void (GSRendererSW::*DrawVertexPtr)(const Vertex& v); + DrawVertexPtr m_dv[4][8], m_pDrawVertex; + + template + void DrawVertexTFX(typename Vertex::Vector& Cf, const Vertex& v); + + typedef void (GSRendererSW::*DrawVertexTFXPtr)(typename Vertex::Vector& Cf, const Vertex& v); + DrawVertexTFXPtr m_dvtfx[4][2][2][4], m_pDrawVertexTFX; + + void SetupTexture(); + + struct uv_wrap_t {union {struct {short min[8], max[8];}; struct {short and[8], or[8];};}; unsigned short mask[8];}* m_uv; + + CRect m_scissor; + BYTE m_clip[65536]; + BYTE m_mask[65536]; + BYTE* m_clamp; + +public: + GSRendererSW(BYTE* base, bool mt, void (*irq)(), bool nloophack); + virtual ~GSRendererSW(); +}; + +class GSRendererSWFP : public GSRendererSW +{ +protected: + void VertexKick(bool skip); + +public: + GSRendererSWFP(BYTE* base, bool mt, void (*irq)(), bool nloophack); +}; +/* +class GSRendererSWFX : public GSRendererSW +{ +protected: + void VertexKick(bool skip); + +public: + GSRendererSWFX(); +}; +*/ \ No newline at end of file diff --git a/gsdx10/GSSettingsDlg.cpp b/gsdx10/GSSettingsDlg.cpp new file mode 100644 index 0000000..b00444c --- /dev/null +++ b/gsdx10/GSSettingsDlg.cpp @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSSettingsDlg.h" +#include + +GSSetting g_renderers[] = +{ + {0, _T("Direct3D10"), NULL}, + {1, _T("Software"), NULL}, + {2, _T("Do not render"), NULL}, +}; + +GSSetting g_interlace[] = +{ + {0, _T("None"), NULL}, + {1, _T("Weave tff"), _T("saw-tooth")}, + {2, _T("Weave bff"), _T("saw-tooth")}, + {3, _T("Bob tff"), _T("use blend if shaking")}, + {4, _T("Bob bff"), _T("use blend if shaking")}, + {5, _T("Blend tff"), _T("slight blur, 1/2 fps")}, + {6, _T("Blend bff"), _T("slight blur, 1/2 fps")}, +}; + +GSSetting g_aspectratio[] = +{ + {0, _T("Stretch"), NULL}, + {1, _T("4:3"), NULL}, + {2, _T("16:9"), NULL}, +}; + +IMPLEMENT_DYNAMIC(GSSettingsDlg, CDialog) +GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/) + : CDialog(GSSettingsDlg::IDD, pParent) + , m_filter(1) + , m_nloophack(2) + , m_nativeres(FALSE) + , m_vsync(FALSE) +{ +} + +GSSettingsDlg::~GSSettingsDlg() +{ +} + +void GSSettingsDlg::InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid) +{ + for(int i = 0; i < count; i++) + { + if(settings[i].id <= maxid) + { + CString str = settings[i].name; + if(settings[i].note != NULL) str = str + _T(" (") + settings[i].note + _T(")"); + int item = combobox.AddString(str); + combobox.SetItemData(item, settings[i].id); + if(settings[i].id == sel) combobox.SetCurSel(item); + } + } +} + +void GSSettingsDlg::DoDataExchange(CDataExchange* pDX) +{ + CDialog::DoDataExchange(pDX); + DDX_Control(pDX, IDC_COMBO3, m_resolution); + DDX_Control(pDX, IDC_COMBO1, m_renderer); + DDX_Control(pDX, IDC_COMBO2, m_interlace); + DDX_Control(pDX, IDC_COMBO5, m_aspectratio); + DDX_Check(pDX, IDC_CHECK4, m_filter); + DDX_Check(pDX, IDC_CHECK6, m_nloophack); + DDX_Control(pDX, IDC_SPIN1, m_resx); + DDX_Control(pDX, IDC_SPIN2, m_resy); + DDX_Check(pDX, IDC_CHECK1, m_nativeres); + DDX_Control(pDX, IDC_EDIT1, m_resxedit); + DDX_Control(pDX, IDC_EDIT2, m_resyedit); + DDX_Check(pDX, IDC_CHECK2, m_vsync); +} + +BEGIN_MESSAGE_MAP(GSSettingsDlg, CDialog) + ON_BN_CLICKED(IDC_CHECK1, &GSSettingsDlg::OnBnClickedCheck1) +END_MESSAGE_MAP() + +// GSSettingsDlg message handlers + +BOOL GSSettingsDlg::OnInitDialog() +{ + __super::OnInitDialog(); + + CWinApp* pApp = AfxGetApp(); + + m_modes.RemoveAll(); + + // windowed + + DXGI_MODE_DESC mode; + memset(&mode, 0, sizeof(mode)); + m_modes.AddTail(mode); + + int iItem = m_resolution.AddString(_T("Windowed")); + m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition()); + m_resolution.SetCurSel(iItem); + + // fullscreen +/* + CComPtr dev; + + if(SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev))) + { + // DXGI_MODE_DESC + + int ModeWidth = pApp->GetProfileInt(_T("Settings"), _T("ModeWidth"), 0); + int ModeHeight = pApp->GetProfileInt(_T("Settings"), _T("ModeHeight"), 0); + int ModeRefreshRate = pApp->GetProfileInt(_T("Settings"), _T("ModeRefreshRate"), 0); + + UINT nModes = pD3D->GetAdapterModeCount(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8); + + for(UINT i = 0; i < nModes; i++) + { + D3DDISPLAYMODE mode; + + if(S_OK == pD3D->EnumAdapterModes(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8, i, &mode)) + { + CString str; + str.Format(_T("%dx%d %dHz"), mode.Width, mode.Height, mode.RefreshRate); + int iItem = m_resolution.AddString(str); + + m_modes.AddTail(mode); + m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition()); + + if(ModeWidth == mode.Width && ModeHeight == mode.Height && ModeRefreshRate == mode.RefreshRate) + { + m_resolution.SetCurSel(iItem); + } + } + } + + pD3D->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &caps); + } +*/ + + InitComboBox(m_renderer, g_renderers, countof(g_renderers), pApp->GetProfileInt(_T("Settings"), _T("renderer"), 0)); + InitComboBox(m_interlace, g_interlace, countof(g_interlace), pApp->GetProfileInt(_T("Settings"), _T("interlace"), 0)); + InitComboBox(m_aspectratio, g_aspectratio, countof(g_aspectratio), pApp->GetProfileInt(_T("Settings"), _T("aspectratio"), 1)); + + // + + m_filter = pApp->GetProfileInt(_T("Settings"), _T("filter"), 1); + m_nloophack = pApp->GetProfileInt(_T("Settings"), _T("nloophack"), 2); + m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); + + m_resx.SetRange(512, 4096); + m_resy.SetRange(512, 4096); + m_resx.SetPos(pApp->GetProfileInt(_T("Settings"), _T("resx"), 1024)); + m_resy.SetPos(pApp->GetProfileInt(_T("Settings"), _T("resy"), 1024)); + m_nativeres = !!pApp->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE); + + m_resx.EnableWindow(!m_nativeres); + m_resy.EnableWindow(!m_nativeres); + m_resxedit.EnableWindow(!m_nativeres); + m_resyedit.EnableWindow(!m_nativeres); + + // + + UpdateData(FALSE); + + return TRUE; // return TRUE unless you set the focus to a control + // EXCEPTION: OCX Property Pages should return FALSE +} + +void GSSettingsDlg::OnOK() +{ + CWinApp* pApp = AfxGetApp(); + + UpdateData(); + + if(m_resolution.GetCurSel() >= 0) + { + const DXGI_MODE_DESC& mode = m_modes.GetAt((POSITION)m_resolution.GetItemData(m_resolution.GetCurSel())); + + pApp->WriteProfileInt(_T("Settings"), _T("ModeWidth"), mode.Width); + pApp->WriteProfileInt(_T("Settings"), _T("ModeHeight"), mode.Height); + pApp->WriteProfileInt(_T("Settings"), _T("ModeRefreshRateNumerator"), mode.RefreshRate.Numerator); + pApp->WriteProfileInt(_T("Settings"), _T("ModeRefreshRateDenominator"), mode.RefreshRate.Denominator); + } + + if(m_renderer.GetCurSel() >= 0) + { + pApp->WriteProfileInt(_T("Settings"), _T("renderer"), (DWORD)m_renderer.GetItemData(m_renderer.GetCurSel())); + } + + if(m_interlace.GetCurSel() >= 0) + { + pApp->WriteProfileInt(_T("Settings"), _T("interlace"), (DWORD)m_interlace.GetItemData(m_interlace.GetCurSel())); + } + + if(m_aspectratio.GetCurSel() >= 0) + { + pApp->WriteProfileInt(_T("Settings"), _T("aspectratio"), (DWORD)m_aspectratio.GetItemData(m_aspectratio.GetCurSel())); + } + + pApp->WriteProfileInt(_T("Settings"), _T("filter"), m_filter); + pApp->WriteProfileInt(_T("Settings"), _T("nloophack"), m_nloophack); + pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync); + + pApp->WriteProfileInt(_T("Settings"), _T("resx"), m_resx.GetPos()); + pApp->WriteProfileInt(_T("Settings"), _T("resy"), m_resy.GetPos()); + pApp->WriteProfileInt(_T("Settings"), _T("nativeres"), m_nativeres); + + __super::OnOK(); +} + +void GSSettingsDlg::OnBnClickedCheck1() +{ + UpdateData(); + + m_resx.EnableWindow(!m_nativeres); + m_resy.EnableWindow(!m_nativeres); + m_resxedit.EnableWindow(!m_nativeres); + m_resyedit.EnableWindow(!m_nativeres); +} diff --git a/gsdx10/GSSettingsDlg.h b/gsdx10/GSSettingsDlg.h new file mode 100644 index 0000000..41d3030 --- /dev/null +++ b/gsdx10/GSSettingsDlg.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "resource.h" +#include "afxwin.h" +#include "afxcmn.h" + +struct GSSetting {DWORD id; const TCHAR* name; const TCHAR* note;}; + +extern GSSetting g_renderers[]; +extern GSSetting g_interlace[]; +extern GSSetting g_aspectratio[]; + +class GSSettingsDlg : public CDialog +{ + DECLARE_DYNAMIC(GSSettingsDlg) + +private: + CAtlList m_modes; + + void InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid = ~0); + +public: + GSSettingsDlg(CWnd* pParent = NULL); // standard constructor + virtual ~GSSettingsDlg(); + +// Dialog Data + enum { IDD = IDD_CONFIG }; + CComboBox m_resolution; + CComboBox m_renderer; + CComboBox m_interlace; + CComboBox m_aspectratio; + int m_filter; + int m_nloophack; + CSpinButtonCtrl m_resx; + CSpinButtonCtrl m_resy; + BOOL m_nativeres; + CEdit m_resxedit; + CEdit m_resyedit; + BOOL m_vsync; + +protected: + virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support + virtual BOOL OnInitDialog(); + virtual void OnOK(); + + DECLARE_MESSAGE_MAP() + +public: + afx_msg void OnBnClickedCheck1(); +}; + diff --git a/gsdx10/GSTexture.cpp b/gsdx10/GSTexture.cpp new file mode 100644 index 0000000..10b0c0c --- /dev/null +++ b/gsdx10/GSTexture.cpp @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTextureCache.h" +#include "GSRendererHW.h" + +GSTextureCache::GSTexture::GSTexture(GSTextureCache* tc) + : GSSurface(tc) + , m_valid(0, 0, 0, 0) + , m_bpp(0) + , m_bpp2(0) + , m_rendered(false) +{ + memset(m_clut, 0, sizeof(m_clut)); +} + +bool GSTextureCache::GSTexture::Create() +{ + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1); + + HRESULT hr; + + m_TEX0 = m_tc->m_renderer->m_context->TEX0; + m_CLAMP = m_tc->m_renderer->m_context->CLAMP; + + DWORD psm = m_TEX0.PSM; + + switch(psm) + { + case PSM_PSMT8: + case PSM_PSMT8H: + case PSM_PSMT4: + case PSM_PSMT4HL: + case PSM_PSMT4HH: + psm = m_TEX0.CPSM; + break; + } + + DXGI_FORMAT format; + + switch(psm) + { + default: + ASSERT(0); + case PSM_PSMCT32: + m_bpp = 32; + m_bpp2 = 0; + format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case PSM_PSMCT24: + m_bpp = 32; + m_bpp2 = 1; + format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + m_bpp = 16; + m_bpp2 = 3; + format = DXGI_FORMAT_R16_UNORM; + break; + } + + int w = 1 << m_TEX0.TW; + int h = 1 << m_TEX0.TH; + + hr = m_tc->m_renderer->m_dev.CreateTexture(m_texture, w, h, format); + + return SUCCEEDED(hr); +} + +bool GSTextureCache::GSTexture::Create(GSRenderTarget* rt) +{ + rt->Update(); + + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1); + + HRESULT hr; + + m_scale = rt->m_scale; + m_TEX0 = m_tc->m_renderer->m_context->TEX0; + m_CLAMP = m_tc->m_renderer->m_context->CLAMP; + m_rendered = true; + + int tw = 1 << m_TEX0.TW; + int th = 1 << m_TEX0.TH; + int tp = (int)m_TEX0.TW << 6; + + int w = (int)(m_scale.x * tw + 0.5f); + int h = (int)(m_scale.y * th + 0.5f); + + // pitch conversion + + if(rt->m_TEX0.TBW != m_TEX0.TBW) // && rt->m_TEX0.PSM == m_TEX0.PSM + { + // sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) + + // ASSERT(rt->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) + + hr = m_tc->m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height); + + int bw = 64; + int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64; + + int sw = (int)rt->m_TEX0.TBW << 6; + + int dw = (int)m_TEX0.TBW << 6; + int dh = 1 << m_TEX0.TH; + + for(int dy = 0; dy < dh; dy += bh) + { + for(int dx = 0; dx < dw; dx += bw) + { + int o = dy * dw / bh + dx; + + int sx = o % sw; + int sy = o / sw; + + D3DXVECTOR4 src, dst; + + src.x = m_scale.x * sx / rt->m_texture.m_desc.Width; + src.y = m_scale.y * sy / rt->m_texture.m_desc.Height; + src.z = m_scale.x * (sx + bw) / rt->m_texture.m_desc.Width; + src.w = m_scale.y * (sy + bh) / rt->m_texture.m_desc.Height; + + dst.x = m_scale.x * dx; + dst.y = m_scale.y * dy; + dst.z = m_scale.x * (dx + bw); + dst.w = m_scale.y * (dy + bh); + + m_tc->m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst); + + // TODO: this is quite a lot of StretchRect, do it with one Draw + } + } + } + else if(tw < tp) + { + // FIXME: timesplitters blurs the render target by blending itself over a couple of times + + if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00)) + { + return false; + } + + // TODO + } + + // width/height conversion + + if(w != rt->m_texture.m_desc.Width || h != rt->m_texture.m_desc.Height) + { + D3DXVECTOR4 dst(0, 0, w, h); + + if(w > rt->m_texture.m_desc.Width) + { + float scale = m_scale.x; + m_scale.x = (float)rt->m_texture.m_desc.Width / tw; + dst.z = (float)rt->m_texture.m_desc.Width * m_scale.x / scale; + w = rt->m_texture.m_desc.Width; + } + + if(h > rt->m_texture.m_desc.Height) + { + float scale = m_scale.y; + m_scale.y = (float)rt->m_texture.m_desc.Height / th; + dst.w = (float)rt->m_texture.m_desc.Height * m_scale.y / scale; + h = rt->m_texture.m_desc.Height; + } + + D3DXVECTOR4 src(0, 0, w, h); + + GSTexture2D* st; + GSTexture2D* dt; + GSTexture2D tmp; + + if(!m_texture) + { + st = &rt->m_texture; + dt = &m_texture; + } + else + { + st = &m_texture; + dt = &tmp; + } + + hr = m_tc->m_renderer->m_dev.CreateRenderTarget(*dt, w, h); + + if(src == dst) + { + D3D10_BOX box = {0, 0, 0, w, h, 1}; + + m_tc->m_renderer->m_dev->CopySubresourceRegion(*dt, 0, 0, 0, 0, *st, 0, &box); + } + else + { + src.z /= st->m_desc.Width; + src.w /= st->m_desc.Height; + + m_tc->m_renderer->m_dev.StretchRect(*st, src, *dt, dst); + } + + if(tmp) + { + m_tc->m_renderer->m_dev.Recycle(m_texture); + + m_texture = tmp; + } + } + + if(!m_texture) + { + hr = m_tc->m_renderer->m_dev.CreateTexture(m_texture, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height); + + m_tc->m_renderer->m_dev->CopyResource(m_texture, rt->m_texture); + } + + switch(m_TEX0.PSM) + { + case PSM_PSMCT32: + m_bpp2 = 0; + break; + case PSM_PSMCT24: + m_bpp2 = 1; + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + m_bpp2 = 2; + break; + case PSM_PSMT8H: + m_bpp2 = 4; + hr = m_tc->m_renderer->m_dev.CreateTexture(m_palette, 256, 1, m_TEX0.CPSM == PSM_PSMCT32 ? DXGI_FORMAT_R8G8B8A8_UNORM : DXGI_FORMAT_R16_UNORM); // + break; + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ASSERT(0); // TODO + break; + } + + return true; +} + +bool GSTextureCache::GSTexture::Create(GSDepthStencil* ds) +{ + m_rendered = true; + + // TODO + + return false; +} + +void GSTextureCache::GSTexture::Update(GSLocalMemory::readTexture rt) +{ + __super::Update(); + + if(m_rendered) + { + return; + } + + CRect r; + + if(!GetDirtyRect(r)) + { + return; + } + + static BYTE* buff = (BYTE*)::_aligned_malloc(1024 * 1024 * 4, 16); + + int pitch = 1024 * m_bpp >> 3; + + BYTE* bits = buff + pitch * r.top + (r.left * m_bpp >> 3); + + (m_tc->m_renderer->m_mem.*rt)(r, bits, pitch, m_tc->m_renderer->m_context->TEX0, m_tc->m_renderer->m_env.TEXA, m_tc->m_renderer->m_context->CLAMP); + + D3D10_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1}; + + m_tc->m_renderer->m_dev->UpdateSubresource(m_texture, 0, &box, bits, pitch, 0); + + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * m_bpp >> 3); + + CRect r2 = m_valid & r; + + if(!r2.IsRectEmpty()) + { + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle2, r2.Width() * r2.Height() * m_bpp >> 3); + } + + m_valid |= r; + m_dirty.RemoveAll(); + + // m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Texture, r.Width() * r.Height() * m_bpp >> 3); +} + +bool GSTextureCache::GSTexture::GetDirtyRect(CRect& r) +{ + int w = 1 << m_TEX0.TW; + int h = 1 << m_TEX0.TH; + + r.SetRect(0, 0, w, h); + + m_tc->m_renderer->MinMaxUV(w, h, r); + + CRect dirty = m_dirty.GetDirtyRect(m_TEX0); + CRect valid = m_valid; + + dirty &= CRect(0, 0, m_texture.m_desc.Width, m_texture.m_desc.Height); + + if(IsRectInRect(r, valid)) + { + if(dirty.IsRectEmpty()) return false; + else if(IsRectInRect(dirty, r)) r = dirty; + else if(IsRectInRect(dirty, valid)) r |= dirty; + else r = valid & dirty; + } + else if(IsRectInRectH(r, valid) && (r.left >= valid.left || r.right <= valid.right)) + { + r.top = valid.top; + r.bottom = valid.bottom; + if(r.left < valid.left) r.right = valid.left; + else /*if(r.right > valid.right)*/ r.left = valid.right; + } + else if(IsRectInRectV(r, valid) && (r.top >= valid.top || r.bottom <= valid.bottom)) + { + r.left = valid.left; + r.right = valid.right; + if(r.top < valid.top) r.bottom = valid.top; + else /*if(r.bottom > valid.bottom)*/ r.top = valid.bottom; + } + else + { + r |= valid; + } + + return !r.IsRectEmpty(); +} diff --git a/gsdx10/GSTexture2D.cpp b/gsdx10/GSTexture2D.cpp new file mode 100644 index 0000000..81027d5 --- /dev/null +++ b/gsdx10/GSTexture2D.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTexture2D.h" + +GSTexture2D::GSTexture2D() +{ + memset(&m_desc, 0, sizeof(m_desc)); +} + +GSTexture2D::GSTexture2D(ID3D10Texture2D* texture) + : m_texture(texture) +{ + ASSERT(m_texture); + + m_texture->GetDevice(&m_dev); + m_texture->GetDesc(&m_desc); +} + +GSTexture2D::~GSTexture2D() +{ +} + +GSTexture2D::operator bool() +{ + return !!m_texture; +} + +bool GSTexture2D::IsShaderResource() const +{ + return !!(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE); +} + +bool GSTexture2D::IsRenderTarget() const +{ + return !!(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET); +} + +bool GSTexture2D::IsDepthStencil() const +{ + return !!(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL); +} + +ID3D10Texture2D* GSTexture2D::operator->() +{ + return m_texture; +} + +GSTexture2D::operator ID3D10Texture2D*() +{ + return m_texture; +} + +GSTexture2D::operator ID3D10ShaderResourceView*() +{ + if(!m_srv && m_dev && m_texture) + { + m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv); + } + + return m_srv; +} + +GSTexture2D::operator ID3D10RenderTargetView*() +{ + ASSERT(m_dev); + + if(!m_rtv && m_dev && m_texture) + { + m_dev->CreateRenderTargetView(m_texture, NULL, &m_rtv); + } + + return m_rtv; +} + +GSTexture2D::operator ID3D10DepthStencilView*() +{ + if(!m_dsv && m_dev && m_texture) + { + m_dev->CreateDepthStencilView(m_texture, NULL, &m_dsv); + } + + return m_dsv; +} diff --git a/gsdx10/GSTexture2D.h b/gsdx10/GSTexture2D.h new file mode 100644 index 0000000..1551c95 --- /dev/null +++ b/gsdx10/GSTexture2D.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +class GSTexture2D +{ + CComPtr m_srv; + CComPtr m_rtv; + CComPtr m_dsv; + +public: + CComPtr m_dev; + CComPtr m_texture; + D3D10_TEXTURE2D_DESC m_desc; + + GSTexture2D(); + explicit GSTexture2D(ID3D10Texture2D* texture); + virtual ~GSTexture2D(); + + operator bool(); + + bool IsShaderResource() const; + bool IsRenderTarget() const; + bool IsDepthStencil() const; + + ID3D10Texture2D* operator->(); + + operator ID3D10Texture2D*(); + operator ID3D10ShaderResourceView*(); + operator ID3D10RenderTargetView*(); + operator ID3D10DepthStencilView*(); +}; diff --git a/gsdx10/GSTextureCache.cpp b/gsdx10/GSTextureCache.cpp new file mode 100644 index 0000000..6b1a62d --- /dev/null +++ b/gsdx10/GSTextureCache.cpp @@ -0,0 +1,578 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GSTextureCache.h" +#include "GSRendererHW.h" +#include "resource.h" + +GSTextureCache::GSTextureCache(GSRendererHW* renderer) + : m_renderer(renderer) +{ + m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE); +} + +GSTextureCache::~GSTextureCache() +{ + RemoveAll(); +} + +void GSTextureCache::RemoveAll() +{ + while(m_rt.GetCount()) delete m_rt.RemoveHead(); + while(m_ds.GetCount()) delete m_ds.RemoveHead(); + while(m_tex.GetCount()) delete m_tex.RemoveHead(); +} + +GSTextureCache::GSRenderTarget* GSTextureCache::GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb) +{ + POSITION pos = m_tex.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSTexture* t = m_tex.GetNext(pos); + + if(HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + m_tex.RemoveAt(cur); + + delete t; + } + } + + GSRenderTarget* rt = NULL; + + if(rt == NULL) + { + for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos)) + { + GSRenderTarget* rt2 = m_rt.GetAt(pos); + + if(rt2->m_TEX0.TBP0 == TEX0.TBP0) + { + m_rt.MoveToHead(pos); + + rt = rt2; + + if(!fb) rt->m_TEX0 = TEX0; + + rt->Update(); + + break; + } + } + } + + if(rt == NULL && fb) + { + // HACK: try to find something close to the base pointer + + for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos)) + { + GSRenderTarget* rt2 = m_rt.GetAt(pos); + + if(rt2->m_TEX0.TBP0 <= TEX0.TBP0 && TEX0.TBP0 < rt2->m_TEX0.TBP0 + 0xe00 && (!rt || rt2->m_TEX0.TBP0 >= rt->m_TEX0.TBP0)) + { + rt = rt2; + } + } + + if(rt) + { + rt->Update(); + } + } + + if(rt == NULL) + { + rt = new GSRenderTarget(this); + + rt->m_TEX0 = TEX0; + + if(!rt->Create(w, h)) + { + delete rt; + + return NULL; + } + + m_rt.AddHead(rt); + } + + if(!m_nativeres) + { + rt->m_scale.x = (float)w / (m_renderer->GetFramePos().cx + rt->m_TEX0.TBW * 64); + rt->m_scale.y = (float)h / (m_renderer->GetFramePos().cy + m_renderer->GetDisplaySize().cy); + } + + if(!fb) + { + rt->m_used = true; + } + + return rt; +} + +GSTextureCache::GSDepthStencil* GSTextureCache::GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h) +{ + POSITION pos = m_tex.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSTexture* t = m_tex.GetNext(pos); + + if(HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + m_tex.RemoveAt(cur); + + delete t; + } + } + + GSDepthStencil* ds = NULL; + + if(ds == NULL) + { + for(POSITION pos = m_ds.GetHeadPosition(); pos; m_ds.GetNext(pos)) + { + GSDepthStencil* ds2 = m_ds.GetAt(pos); + + if(ds2->m_TEX0.TBP0 == TEX0.TBP0) + { + m_ds.MoveToHead(pos); + + ds = ds2; + + ds->m_TEX0 = TEX0; + + ds->Update(); + + break; + } + } + } + + if(ds == NULL) + { + ds = new GSDepthStencil(this); + + ds->m_TEX0 = TEX0; + + if(!ds->Create(w, h)) + { + delete ds; + + return NULL; + } + + m_ds.AddHead(ds); + } + + if(!m_renderer->m_context->ZBUF.ZMSK) + { + ds->m_used = true; + } + + return ds; +} + +GSTextureCache::GSTexture* GSTextureCache::GetTexture() +{ + const GIFRegTEX0& TEX0 = m_renderer->m_context->TEX0; + const GIFRegCLAMP& CLAMP = m_renderer->m_context->CLAMP; + + DWORD clut[256]; + + int pal = GSLocalMemory::m_psm[TEX0.PSM].pal; + + if(pal > 0) + { + m_renderer->m_mem.SetupCLUT(TEX0); + m_renderer->m_mem.CopyCLUT32(clut, pal); +/* + POSITION pos = m_tex.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSSurface* s = m_tex.GetNext(pos); + + if(s->m_TEX0.TBP0 == TEX0.CBP) + { + m_tex.RemoveAt(cur); + + delete s; + } + } + + pos = m_rt.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSSurface* s = m_rt.GetNext(pos); + + if(s->m_TEX0.TBP0 == TEX0.CBP) + { + m_rt.RemoveAt(cur); + + delete s; + } + } + + pos = m_ds.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSSurface* s = m_ds.GetNext(pos); + + if(s->m_TEX0.TBP0 == TEX0.CBP) + { + m_ds.RemoveAt(cur); + + delete s; + } + }*/ + } + + GSTexture* t = NULL; + + for(POSITION pos = m_tex.GetHeadPosition(); pos; m_tex.GetNext(pos)) + { + t = m_tex.GetAt(pos); + + if(HasSharedBits(t->m_TEX0.TBP0, t->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM)) + { + if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW + && TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH + && (CLAMP.WMS != 3 && t->m_CLAMP.WMS != 3 && CLAMP.WMT != 3 && t->m_CLAMP.WMT != 3 || CLAMP.i64 == t->m_CLAMP.i64) + && (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && !memcmp(t->m_clut, clut, pal * sizeof(clut[0])))) + { + m_tex.MoveToHead(pos); + + break; + } + } + + t = NULL; + } + + if(t == NULL) + { + for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos)) + { + GSRenderTarget* rt = m_rt.GetAt(pos); + + if(rt->m_dirty.IsEmpty() && HasSharedBits(rt->m_TEX0.TBP0, rt->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM)) + { + t = new GSTexture(this); + + if(!t->Create(rt)) + { + delete t; + + return NULL; + } + + m_tex.AddHead(t); + + break; + } + } + } + + if(t == NULL) + { + for(POSITION pos = m_ds.GetHeadPosition(); pos; m_ds.GetNext(pos)) + { + GSDepthStencil* ds = m_ds.GetAt(pos); + + if(ds->m_dirty.IsEmpty() && ds->m_used && HasSharedBits(ds->m_TEX0.TBP0, ds->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM)) + { + t = new GSTexture(this); + + if(!t->Create(ds)) + { + delete t; + + return NULL; + } + + m_tex.AddHead(t); + + break; + } + } + } + + if(t == NULL) + { + t = new GSTexture(this); + + if(!t->Create()) + { + delete t; + + return NULL; + } + + m_tex.AddHead(t); + } + + if(pal > 0) + { + int size = pal * sizeof(clut[0]); + + if(t->m_palette) + { + // TODO: sse2 + + DWORD sum = 0; + + for(int i = 0; i < pal; i++) + { + sum |= t->m_clut[i] ^ clut[i]; + + t->m_clut[i] = clut[i]; + } + + if(sum != 0) + { + D3D10_BOX box = {0, 0, 0, pal, 1, 1}; + + m_renderer->m_dev->UpdateSubresource(t->m_palette, 0, &box, t->m_clut, size, 0); + + // m_renderer->m_perfmon.Put(GSPerfMon::Texture, size); + } + } + else + { + memcpy(t->m_clut, clut, size); + } + } + + t->Update(&GSLocalMemory::ReadTextureNP); + + return t; +} + +void GSTextureCache::InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r) +{ + POSITION pos = m_tex.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSTexture* t = m_tex.GetNext(pos); + + if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + if(BITBLTBUF.DBW == t->m_TEX0.TBW) + { + t->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r)); + } + else + { + m_tex.RemoveAt(cur); + + delete t; + } + } + } + + pos = m_rt.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSRenderTarget* rt = m_rt.GetNext(pos); + + if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, rt->m_TEX0.TBP0, rt->m_TEX0.PSM)) + { + if(BITBLTBUF.DPSM == PSM_PSMCT32 + || BITBLTBUF.DPSM == PSM_PSMCT24 + || BITBLTBUF.DPSM == PSM_PSMCT16 + || BITBLTBUF.DPSM == PSM_PSMCT16S + || BITBLTBUF.DPSM == PSM_PSMZ32 + || BITBLTBUF.DPSM == PSM_PSMZ24 + || BITBLTBUF.DPSM == PSM_PSMZ16 + || BITBLTBUF.DPSM == PSM_PSMZ16S) + { + rt->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r)); + rt->m_TEX0.TBW = BITBLTBUF.DBW; + } + else + { + m_rt.RemoveAt(cur); + + delete rt; + + continue; + } + } + + if(HasSharedBits(BITBLTBUF.DPSM, rt->m_TEX0.PSM) && BITBLTBUF.DBP < rt->m_TEX0.TBP0) + { + DWORD rowsize = BITBLTBUF.DBW * 8192; + DWORD offset = (rt->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256; + + if(rowsize > 0 && offset % rowsize == 0) + { + int y = m_renderer->m_mem.m_psm[BITBLTBUF.DPSM].pgs.cy * offset / rowsize; + + if(r.top >= y) + { + // TODO: do not add this rect above too + rt->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, CRect(r.left, r.top - y, r.right, r.bottom - y))); + rt->m_TEX0.TBW = BITBLTBUF.DBW; + continue; + } + } + } + } + + // copypaste for ds + + pos = m_ds.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + GSDepthStencil* ds = m_ds.GetNext(pos); + + if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, ds->m_TEX0.TBP0, ds->m_TEX0.PSM)) + { + if(BITBLTBUF.DPSM == PSM_PSMCT32 + || BITBLTBUF.DPSM == PSM_PSMCT24 + || BITBLTBUF.DPSM == PSM_PSMCT16 + || BITBLTBUF.DPSM == PSM_PSMCT16S + || BITBLTBUF.DPSM == PSM_PSMZ32 + || BITBLTBUF.DPSM == PSM_PSMZ24 + || BITBLTBUF.DPSM == PSM_PSMZ16 + || BITBLTBUF.DPSM == PSM_PSMZ16S) + { + ds->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r)); + ds->m_TEX0.TBW = BITBLTBUF.DBW; + } + else + { + m_ds.RemoveAt(cur); + + delete ds; + + continue; + } + } + + if(HasSharedBits(BITBLTBUF.DPSM, ds->m_TEX0.PSM) && BITBLTBUF.DBP < ds->m_TEX0.TBP0) + { + DWORD rowsize = BITBLTBUF.DBW * 8192; + DWORD offset = (ds->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256; + + if(rowsize > 0 && offset % rowsize == 0) + { + int y = m_renderer->m_mem.m_psm[BITBLTBUF.DPSM].pgs.cy * offset / rowsize; + + if(r.top >= y) + { + // TODO: do not add this rect above too + ds->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, CRect(r.left, r.top - y, r.right, r.bottom - y))); + ds->m_TEX0.TBW = BITBLTBUF.DBW; + continue; + } + } + } + } +} + +void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r) +{ + POSITION pos = m_rt.GetHeadPosition(); + + while(pos) + { + GSRenderTarget* rt = m_rt.GetNext(pos); + + if(HasSharedBits(BITBLTBUF.SBP, BITBLTBUF.SPSM, rt->m_TEX0.TBP0, rt->m_TEX0.PSM)) + { + rt->Read(r); + break; + } + } +} + +void GSTextureCache::IncAge() +{ + RecycleByAge(m_tex, 2); + RecycleByAge(m_rt); + RecycleByAge(m_ds); +} + +template void GSTextureCache::RecycleByAge(CAtlList& l, int maxage) +{ + POSITION pos = l.GetHeadPosition(); + + while(pos) + { + POSITION cur = pos; + + T* t = l.GetNext(pos); + + if(++t->m_age >= maxage) + { + l.RemoveAt(cur); + + delete t; + } + } +} + +// + +GSTextureCache::GSSurface::GSSurface(GSTextureCache* tc) + : m_tc(tc) + , m_scale(1, 1) + , m_age(0) +{ + m_TEX0.TBP0 = ~0; +} + +GSTextureCache::GSSurface::~GSSurface() +{ + m_tc->m_renderer->m_dev.Recycle(m_texture); + m_tc->m_renderer->m_dev.Recycle(m_palette); +} + +void GSTextureCache::GSSurface::Update() +{ + m_age = 0; +} diff --git a/gsdx10/GSTextureCache.h b/gsdx10/GSTextureCache.h new file mode 100644 index 0000000..64ae419 --- /dev/null +++ b/gsdx10/GSTextureCache.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSTexture2D.h" + +class GSRendererHW; + +class GSTextureCache +{ +public: + class GSSurface + { + protected: + GSTextureCache* m_tc; + + public: + GSTexture2D m_texture; + GSTexture2D m_palette; + GSScale m_scale; + int m_age; + GSDirtyRectList m_dirty; + GIFRegTEX0 m_TEX0; + + explicit GSSurface(GSTextureCache* tc); + virtual ~GSSurface(); + + void Update(); + }; + + class GSRenderTarget : public GSSurface + { + public: + bool m_used; + + explicit GSRenderTarget(GSTextureCache* tc); + + bool Create(int w, int h); + void Update(); + void Read(CRect r); + }; + + class GSDepthStencil : public GSSurface + { + public: + bool m_used; + + explicit GSDepthStencil(GSTextureCache* tc); + + bool Create(int w, int h); + void Update(); + }; + + class GSTexture : public GSSurface + { + bool GetDirtyRect(CRect& r); + + public: + GIFRegCLAMP m_CLAMP; + DWORD m_clut[256]; // * + CRect m_valid; + int m_bpp; + int m_bpp2; + bool m_rendered; + + explicit GSTexture(GSTextureCache* tc); + + bool Create(); + bool Create(GSRenderTarget* rt); + bool Create(GSDepthStencil* ds); + void Update(GSLocalMemory::readTexture rt); + }; + +protected: + GSRendererHW* m_renderer; + CAtlList m_rt; + CAtlList m_ds; + CAtlList m_tex; + bool m_nativeres; + + template void RecycleByAge(CAtlList& l, int maxage = 10); + +public: + GSTextureCache(GSRendererHW* renderer); + virtual ~GSTextureCache(); + + void RemoveAll(); + + GSRenderTarget* GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb = false); + GSDepthStencil* GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h); + GSTexture* GetTexture(); + + void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r); + void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r); + + void IncAge(); +}; diff --git a/gsdx10/GSTextureFX.cpp b/gsdx10/GSTextureFX.cpp new file mode 100644 index 0000000..b6d30d4 --- /dev/null +++ b/gsdx10/GSTextureFX.cpp @@ -0,0 +1,473 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTextureFX.h" +#include "resource.h" + +GSTextureFX::GSTextureFX() + : m_dev(NULL) +{ + memset(m_vb_max, 0, sizeof(m_vb_max)); + m_vb_cur = 0; + memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache)); + memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache)); +} + +bool GSTextureFX::Create(GSDevice* dev) +{ + m_dev = dev; + + // shaders + + HRESULT hr; + + D3D10_INPUT_ELEMENT_DESC il[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 24, D3D10_INPUT_PER_VERTEX_DATA, 0}, + }; + + hr = m_dev->CompileShader(&m_vs, IDR_TFX_FX, "vs_main", il, countof(il), &m_il); + + if(FAILED(hr)) return false; + + // buffers + + D3D10_BUFFER_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + bd.ByteWidth = sizeof(VSConstantBuffer); + bd.Usage = D3D10_USAGE_DEFAULT; + bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER; + + hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vs_cb); + + if(FAILED(hr)) return false; + + memset(&bd, 0, sizeof(bd)); + + bd.ByteWidth = sizeof(PSConstantBuffer); + bd.Usage = D3D10_USAGE_DEFAULT; + bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER; + + hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_ps_cb); + + if(FAILED(hr)) return false; + + return true; +} + +bool GSTextureFX::SetupIA(const GSVertexHW* vertices, UINT count, D3D10_PRIMITIVE_TOPOLOGY prim) +{ + HRESULT hr; + + int i = m_vb_cur; + + m_vb_cur = (m_vb_cur + 1) % countof(m_vb); + + if(m_vb[i]) + { + if(m_vb_max[i] < max(count, 100000)) + { + (*m_dev)->Flush(); + + m_vb[i] = NULL; + } + } + + if(!m_vb[i]) + { + m_vb_max[i] = max(count, 100000); + + D3D10_BUFFER_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + bd.Usage = D3D10_USAGE_DEFAULT; + bd.ByteWidth = m_vb_max[i] * sizeof(GSVertexHW); + bd.BindFlags = D3D10_BIND_VERTEX_BUFFER; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + + hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vb[i]); + + if(FAILED(hr)) return false; + } + + m_dev->IASet(m_vb[i], count, vertices, m_il, prim); + + return true; +} + +bool GSTextureFX::SetupVS(const VSConstantBuffer* cb) +{ + if(memcmp(&m_vs_cb_cache, cb, sizeof(*cb))) + { + (*m_dev)->UpdateSubresource(m_vs_cb, 0, NULL, cb, 0, 0); + + memcpy(&m_vs_cb_cache, cb, sizeof(*cb)); + } + + m_dev->VSSet(m_vs, m_vs_cb); + + return true; +} + +bool GSTextureFX::SetupGS(GSSelector sel) +{ + HRESULT hr; + + CComPtr gs; + + if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) // geometry shader works in every case, but not needed + { + if(!(gs = m_gs.Lookup(sel))) + { + CStringA str[2]; + + str[0].Format("%d", sel.iip); + str[1].Format("%d", sel.prim); + + D3D10_SHADER_MACRO macro[] = + { + {"IIP", str[0]}, + {"PRIM", str[1]}, + {NULL, NULL}, + }; + + hr = m_dev->CompileShader(&gs, IDR_TFX_FX, "gs_main", macro); + + ASSERT(SUCCEEDED(hr)); + + m_gs.Add(sel, gs); + } + } + + m_dev->GSSet(gs); + + return true; +} + +bool GSTextureFX::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, ID3D10ShaderResourceView* srv, ID3D10ShaderResourceView* pal) +{ + if(memcmp(&m_ps_cb_cache, cb, sizeof(*cb))) + { + (*m_dev)->UpdateSubresource(m_ps_cb, 0, NULL, cb, 0, 0); + + memcpy(&m_ps_cb_cache, cb, sizeof(*cb)); + } + + (*m_dev)->PSSetConstantBuffers(0, 1, &m_ps_cb.p); + + m_dev->PSSetShaderResources(srv, pal); + + UpdatePS(sel, ssel); + + return true; +} + +void GSTextureFX::UpdatePS(PSSelector sel, PSSamplerSelector ssel) +{ + HRESULT hr; + + CComPtr ps; + + if(!(ps = m_ps.Lookup(sel))) + { + CStringA str[12]; + + str[0].Format("%d", sel.fst); + str[1].Format("%d", sel.clamp); + str[2].Format("%d", sel.bpp); + str[3].Format("%d", sel.aem); + str[4].Format("%d", sel.tfx); + str[5].Format("%d", sel.tcc); + str[6].Format("%d", sel.ate); + str[7].Format("%d", sel.atst); + str[8].Format("%d", sel.fog); + str[9].Format("%d", sel.clr1); + str[10].Format("%d", sel.fba); + str[11].Format("%d", sel.aout); + + D3D10_SHADER_MACRO macro[] = + { + {"FST", str[0]}, + {"CLAMP", str[1]}, + {"BPP", str[2]}, + {"AEM", str[3]}, + {"TFX", str[4]}, + {"TCC", str[5]}, + {"ATE", str[6]}, + {"ATST", str[7]}, + {"FOG", str[8]}, + {"CLR1", str[9]}, + {"FBA", str[10]}, + {"AOUT", str[11]}, + {NULL, NULL}, + }; + + hr = m_dev->CompileShader(&ps, IDR_TFX_FX, "ps_main", macro); + + ASSERT(SUCCEEDED(hr)); + + m_ps.Add(sel, ps); + } + + CComPtr ss; + + if(sel.tfx != 4) + { + if(sel.bpp >= 3) ssel.min = ssel.mag = 0; + + if(!(ss = m_ps_ss.Lookup(ssel))) + { + D3D10_SAMPLER_DESC sd; + + memset(&sd, 0, sizeof(sd)); + + sd.AddressU = ssel.tau ? D3D10_TEXTURE_ADDRESS_WRAP : D3D10_TEXTURE_ADDRESS_CLAMP; + sd.AddressV = ssel.tav ? D3D10_TEXTURE_ADDRESS_WRAP : D3D10_TEXTURE_ADDRESS_CLAMP; + sd.AddressW = D3D10_TEXTURE_ADDRESS_CLAMP; + + sd.Filter = D3D10_ENCODE_BASIC_FILTER( + (ssel.min ? D3D10_FILTER_TYPE_LINEAR : D3D10_FILTER_TYPE_POINT), + (ssel.mag ? D3D10_FILTER_TYPE_LINEAR : D3D10_FILTER_TYPE_POINT), + D3D10_FILTER_TYPE_POINT, + false); + + sd.MaxLOD = FLT_MAX; + sd.MaxAnisotropy = 16; + sd.ComparisonFunc = D3D10_COMPARISON_NEVER; + + hr = (*m_dev)->CreateSamplerState(&sd, &ss); + + m_ps_ss.Add(ssel, ss); + } + } + + m_dev->PSSet(ps, ss); +} + +void GSTextureFX::SetupRS(UINT w, UINT h, const RECT& scissor) +{ + m_dev->RSSet(w, h, &scissor); +} + +void GSTextureFX::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv) +{ + UpdateOM(dssel, bsel, bf); + + m_dev->OMSetRenderTargets(rtv, dsv); +} + +void GSTextureFX::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf) +{ + HRESULT hr; + + CComPtr dss; + + if(!(dss = m_om_dss.Lookup(dssel))) + { + D3D10_DEPTH_STENCIL_DESC dsd; + + memset(&dsd, 0, sizeof(dsd)); + + if(dssel.date) + { + dsd.StencilEnable = true; + dsd.StencilReadMask = 1; + dsd.StencilWriteMask = 1; + dsd.FrontFace.StencilFunc = D3D10_COMPARISON_EQUAL; + dsd.FrontFace.StencilPassOp = D3D10_STENCIL_OP_KEEP; + dsd.FrontFace.StencilFailOp = D3D10_STENCIL_OP_KEEP; + dsd.FrontFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP; + dsd.BackFace.StencilFunc = D3D10_COMPARISON_EQUAL; + dsd.BackFace.StencilPassOp = D3D10_STENCIL_OP_KEEP; + dsd.BackFace.StencilFailOp = D3D10_STENCIL_OP_KEEP; + dsd.BackFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP; + } + + if(!(dssel.zte && dssel.ztst == 1 && !dssel.zwe)) + { + static const D3D10_COMPARISON_FUNC ztst[] = + { + D3D10_COMPARISON_NEVER, + D3D10_COMPARISON_ALWAYS, + D3D10_COMPARISON_GREATER_EQUAL, + D3D10_COMPARISON_GREATER + }; + + dsd.DepthEnable = dssel.zte; + dsd.DepthWriteMask = dssel.zwe ? D3D10_DEPTH_WRITE_MASK_ALL : D3D10_DEPTH_WRITE_MASK_ZERO; + dsd.DepthFunc = ztst[dssel.ztst]; + } + + hr = (*m_dev)->CreateDepthStencilState(&dsd, &dss); + + m_om_dss.Add(dssel, dss); + } + + CComPtr bs; + + if(!(bs = m_om_bs.Lookup(bsel))) + { + D3D10_BLEND_DESC bd; + + memset(&bd, 0, sizeof(bd)); + + bd.BlendEnable[0] = bsel.abe; + + if(bsel.abe) + { + // (A:Cs/Cd/0 - B:Cs/Cd/0) * C:As/Ad/FIX + D:Cs/Cd/0 + + static const struct {int bogus; D3D10_BLEND_OP op; D3D10_BLEND src, dst;} map[3*3*3*3] = + { + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 0000: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 0001: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 0002: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 0010: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 0011: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 0012: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 0020: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 0021: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 0022: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA}, // * 0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_INV_SRC1_ALPHA}, // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA}, // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As + {1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA}, // * 0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_INV_DEST_ALPHA}, // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA}, // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad + {1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR}, // * 0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_INV_BLEND_FACTOR}, // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR}, // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F + {1, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO}, // * 0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ONE}, // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO}, // 0202: (Cs - 0)*As + 0 ==> Cs*As + {1, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO}, // * 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ONE}, // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ZERO}, // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad + {1, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO}, // * 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ONE}, // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO}, // 0222: (Cs - 0)*F + 0 ==> Cs*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA}, // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) + {1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA}, // * 1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA}, // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA}, // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) + {1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA}, // * 1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA}, // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR}, // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) + {1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},// * 1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},// 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 1100: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 1101: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 1102: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 1110: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 1111: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 1112: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 1120: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 1121: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 1122: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_SRC1_ALPHA}, // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As + {2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_SRC1_ALPHA}, // ** 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background glow effect + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_SRC1_ALPHA}, // 1202: (Cd - 0)*As + 0 ==> Cd*As + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA}, // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad + {2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_DEST_ALPHA}, // ** 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_DEST_ALPHA}, // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR}, // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F + {2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_BLEND_FACTOR}, // ** 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_BLEND_FACTOR}, // 1222: (Cd - 0)*F + 0 ==> Cd*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_SRC1_ALPHA, D3D10_BLEND_ZERO}, // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ONE}, // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO}, // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_DEST_ALPHA, D3D10_BLEND_ZERO}, // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ONE}, // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ZERO}, // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_BLEND_FACTOR, D3D10_BLEND_ZERO}, // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ONE}, // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F + {0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO}, // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_SRC1_ALPHA}, // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_SRC1_ALPHA}, // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ZERO, D3D10_BLEND_SRC1_ALPHA}, // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA}, // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_DEST_ALPHA}, // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA}, // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR}, // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_BLEND_FACTOR}, // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) + {0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR}, // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 2200: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 2201: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 2202: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 2210: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 2211: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 2212: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO}, // 2220: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE}, // 2221: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd + {0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO}, // 2222: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0 + }; + + // bogus: 0100, 0110, 0120, 0200, 0210, 0220, 1001, 1011, 1021 + + // tricky: 1201, 1211, 1221 + // + // Source.rgb = float3(1, 1, 1); + // 1201 Cd*(1 + As) => Source * Dest color + Dest * Source1 alpha + // 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha + // 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor + + int i = (((bsel.a & 3) * 3 + (bsel.b & 3)) * 3 + (bsel.c & 3)) * 3 + (bsel.d & 3); + + ASSERT(bsel.a != 3); + ASSERT(bsel.b != 3); + ASSERT(bsel.c != 3); + ASSERT(bsel.d != 3); + + bd.BlendOp = map[i].op; + bd.SrcBlend = map[i].src; + bd.DestBlend = map[i].dst; + bd.BlendOpAlpha = D3D10_BLEND_OP_ADD; + bd.SrcBlendAlpha = D3D10_BLEND_ONE; + bd.DestBlendAlpha = D3D10_BLEND_ZERO; + + if(map[i].bogus == 1) + { + ASSERT(0); + + (bsel.a == 0 ? bd.SrcBlend : bd.DestBlend) = D3D10_BLEND_ONE; + } + } + + if(bsel.wr) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_RED; + if(bsel.wg) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_GREEN; + if(bsel.wb) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_BLUE; + if(bsel.wa) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_ALPHA; + + hr = (*m_dev)->CreateBlendState(&bd, &bs); + + m_om_bs.Add(bsel, bs); + } + + m_dev->OMSet(dss, 1, bs, bf); +} diff --git a/gsdx10/GSTextureFX.h b/gsdx10/GSTextureFX.h new file mode 100644 index 0000000..602d27e --- /dev/null +++ b/gsdx10/GSTextureFX.h @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSVertexHW.h" +#include "GSDevice.h" + +class GSTextureFX +{ +public: + #pragma pack(push, 1) + + struct VSConstantBuffer + { + D3DXVECTOR4 VertexScale; + D3DXVECTOR4 VertexOffset; + D3DXVECTOR2 TextureScale; + float _pad[2]; + }; + + union GSSelector + { + struct + { + DWORD iip:1; + DWORD prim:2; + }; + + DWORD dw; + + operator DWORD() {return dw & 0x7;} + }; + + struct PSConstantBuffer + { + D3DXVECTOR4 FogColor; + D3DXVECTOR2 ClampMin; + D3DXVECTOR2 ClampMax; + float TA0; + float TA1; + float AREF; + float _pad[1]; + D3DXVECTOR2 WH; + D3DXVECTOR2 rWrH; + D3DXVECTOR2 rWZ; + D3DXVECTOR2 ZrH; + }; + + union PSSelector + { + struct + { + DWORD fst:1; + DWORD clamp:1; + DWORD bpp:3; + DWORD aem:1; + DWORD tfx:3; + DWORD tcc:1; + DWORD ate:1; + DWORD atst:3; + DWORD fog:1; + DWORD clr1:1; + DWORD fba:1; + DWORD aout:1; + }; + + DWORD dw; + + operator DWORD() {return dw & 0x3ffff;} + }; + + union PSSamplerSelector + { + struct + { + DWORD tau:1; + DWORD tav:1; + DWORD min:1; + DWORD mag:1; + }; + + DWORD dw; + + operator DWORD() {return dw & 0xf;} + }; + + union OMDepthStencilSelector + { + struct + { + DWORD zte:1; + DWORD ztst:2; + DWORD zwe:1; + DWORD date:1; + }; + + DWORD dw; + + operator DWORD() {return dw & 0x1f;} + }; + + union OMBlendSelector + { + struct + { + DWORD abe:1; + DWORD a:2; + DWORD b:2; + DWORD c:2; + DWORD d:2; + DWORD wr:1; + DWORD wg:1; + DWORD wb:1; + DWORD wa:1; + }; + + DWORD dw; + + operator DWORD() {return dw & 0x1fff;} + }; + + #pragma pack(pop) + +private: + GSDevice* m_dev; + CComPtr m_il; + CComPtr m_vs; + CComPtr m_vs_cb; + CSimpleMap > m_gs; + CSimpleMap > m_ps; + CComPtr m_ps_cb; + CSimpleMap > m_ps_ss; + CSimpleMap > m_om_dss; + CSimpleMap > m_om_bs; + + CComPtr m_vb[1]; + int m_vb_max[1]; + int m_vb_cur; + + VSConstantBuffer m_vs_cb_cache; + PSConstantBuffer m_ps_cb_cache; + +public: + GSTextureFX(); + + bool Create(GSDevice* dev); + + bool SetupIA(const GSVertexHW* vertices, UINT count, D3D10_PRIMITIVE_TOPOLOGY prim); + bool SetupVS(const VSConstantBuffer* cb); + bool SetupGS(GSSelector sel); + bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, ID3D10ShaderResourceView* srv, ID3D10ShaderResourceView* pal); + void UpdatePS(PSSelector sel, PSSamplerSelector ssel); + void SetupRS(UINT w, UINT h, const RECT& scissor); + void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv); + void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf); +}; diff --git a/gsdx10/GSVertexHW.h b/gsdx10/GSVertexHW.h new file mode 100644 index 0000000..4f9f455 --- /dev/null +++ b/gsdx10/GSVertexHW.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +__declspec(align(16)) union GSVertexHW +{ + struct + { + float x, y, z, w; + union {struct {BYTE r, g, b, a;}; DWORD c;}; + DWORD f; + float u, v; + }; + + struct {__m128i m128i[2];}; + struct {__m128 m128[2];}; + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + GSVertexHW& operator = (GSVertexHW& v) {m128i[0] = v.m128i[0]; m128i[1] = v.m128i[1]; return *this;} +#endif +}; diff --git a/gsdx10/GSVertexSW.h b/gsdx10/GSVertexSW.h new file mode 100644 index 0000000..5ea65d2 --- /dev/null +++ b/gsdx10/GSVertexSW.h @@ -0,0 +1,315 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +// +// GSVertexSWFP +// + +__declspec(align(16)) union GSVertexSWFP +{ + class __declspec(novtable) Scalar + { + float val; + + public: + Scalar() {} + explicit Scalar(float f) {val = f;} + explicit Scalar(int i) {val = (float)i;} + + float GetValue() const {return val;} + void SetValue(int i) {val = (float)i;} + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + void sat() {_mm_store_ss(&val, _mm_min_ss(_mm_max_ss(_mm_set_ss(val), _mm_setzero_ps()), _mm_set_ss(255)));} + void rcp() {_mm_store_ss(&val, _mm_rcp_ss(_mm_set_ss(val)));} +#else + void sat() {val = val < 0 ? 0 : val > 255 ? 255 : val;} + void rcp() {val = 1.0f / val;} +#endif + void abs() {val = fabs(val);} + + Scalar floor_s() const {return Scalar(floor(val));} + int floor_i() const {return (int)floor(val);} + + Scalar ceil_s() const {return Scalar(-floor(-val));} + int ceil_i() const {return -(int)floor(-val);} + + void operator = (float f) {val = f;} + void operator = (int i) {val = (float)i;} + + operator float() const {return val;} + operator int() const {return (int)val;} + + void operator += (const Scalar& s) {val += s.val;} + void operator -= (const Scalar& s) {val -= s.val;} + void operator *= (const Scalar& s) {val *= s.val;} + void operator /= (const Scalar& s) {val /= s.val;} + + friend Scalar operator + (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val + s2.val);} + friend Scalar operator - (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val - s2.val);} + friend Scalar operator * (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val * s2.val);} + friend Scalar operator / (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val / s2.val);} + + friend Scalar operator + (const Scalar& s, int i) {return Scalar(s.val + i);} + friend Scalar operator - (const Scalar& s, int i) {return Scalar(s.val - i);} + friend Scalar operator * (const Scalar& s, int i) {return Scalar(s.val * i);} + friend Scalar operator / (const Scalar& s, int i) {return Scalar(s.val / i);} + + friend Scalar operator << (const Scalar& s, int i) {return Scalar(s.val * (1<> (const Scalar& s, int i) {return Scalar(s.val / (1< (const Scalar& s1, const Scalar& s2) {return s1.val > s2.val;} + }; + + __declspec(align(16)) class __declspec(novtable) Vector + { + public: + union + { + union {struct {Scalar x, y, z, q;}; struct {Scalar r, g, b, a;};}; + union {struct {Scalar v[4];}; struct {Scalar c[4];};}; +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + union {__m128 xyzq; __m128 rgba;}; +#endif + }; + + Vector() {} + Vector(const Vector& v) {*this = v;} + Vector(Scalar s) {*this = s;} + Vector(Scalar s0, Scalar s1, Scalar s2, Scalar s3) {x = s0; y = s1; z = s2; q = s3;} + explicit Vector(DWORD dw) {*this = dw;} +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + Vector(__m128 f0123) {*this = f0123;} +#endif + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + + void operator = (const Vector& v) {xyzq = v.xyzq;} + void operator = (Scalar s) {xyzq = _mm_set1_ps(s);} + + void operator = (__m128 f0123) {xyzq = f0123;} + operator __m128() const {return xyzq;} + + void operator = (DWORD dw) {__m128i zero = _mm_setzero_si128(); xyzq = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dw), zero), zero));} + operator DWORD() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); r0 = _mm_packus_epi16(r0, r0); return (DWORD)_mm_cvtsi128_si32(r0);} + operator UINT64() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); return *(UINT64*)&r0;} + + void sat() {xyzq = _mm_min_ps(_mm_max_ps(xyzq, _mm_setzero_ps()), _mm_set1_ps(255));} + void rcp() {xyzq = _mm_rcp_ps(xyzq);} + + Vector floor() + { + const __m128i _80000000 = _mm_set1_epi32(0x80000000); + const __m128i _4b000000 = _mm_set1_epi32(0x4b000000); + const __m128i _3f800000 = _mm_set1_epi32(0x3f800000); + + __m128 sign = _mm_and_ps(xyzq, *(__m128*)&_80000000); + __m128 r0 = _mm_or_ps(sign, *(__m128*)&_4b000000); + __m128 r1 = _mm_sub_ps(_mm_add_ps(xyzq, r0), r0); + __m128 r2 = _mm_sub_ps(r1, xyzq); + __m128 r3 = _mm_and_ps(_mm_cmpnle_ps(r2, sign), *(__m128*)&_3f800000); + __m128 r4 = _mm_sub_ps(r1, r3); + return r4; + } + + void operator += (const Vector& v) {xyzq = _mm_add_ps(xyzq, v);} + void operator -= (const Vector& v) {xyzq = _mm_sub_ps(xyzq, v);} + void operator *= (const Vector& v) {xyzq = _mm_mul_ps(xyzq, v);} + void operator /= (const Vector& v) {xyzq = _mm_div_ps(xyzq, v);} + +#else + + void operator = (const Vector& v) {x = v.x; y = v.y; z = v.z; q = v.q;} + void operator = (Scalar s) {x = y = z = q = s;} + + void operator = (DWORD dw) + { + x = Scalar((int)((dw>>0)&0xff)); + y = Scalar((int)((dw>>8)&0xff)); + z = Scalar((int)((dw>>16)&0xff)); + q = Scalar((int)((dw>>24)&0xff)); + } + + operator DWORD() const + { + return (DWORD)( + (((DWORD)(int)x&0xff)<<0) | + (((DWORD)(int)y&0xff)<<8) | + (((DWORD)(int)z&0xff)<<16) | + (((DWORD)(int)q&0xff)<<24)); + } + + operator UINT64() const + { + return (DWORD)( + (((UINT64)(int)x&0xffff)<<0) | + (((UINT64)(int)y&0xffff)<<16) | + (((UINT64)(int)z&0xffff)<<32) | + (((UINT64)(int)q&0xffff)<<48)); + } + + void sat() {x.sat(); y.sat(); z.sat(); q.sat();} + void rcp() {x.rcp(); y.rcp(); z.rcp(); q.rcp();} + + Vector floor() {return Vector(x.floor_s(), y.floor_s(), z.floor_s(), q.floor_s());} + + void operator += (const Vector& v) {*this = *this + v;} + void operator -= (const Vector& v) {*this = *this - v;} + void operator *= (const Vector& v) {*this = *this * v;} + void operator /= (const Vector& v) {*this = *this / v;} + +#endif + + friend Vector operator + (const Vector& v1, const Vector& v2); + friend Vector operator - (const Vector& v1, const Vector& v2); + friend Vector operator * (const Vector& v1, const Vector& v2); + friend Vector operator / (const Vector& v1, const Vector& v2); + + friend Vector operator + (const Vector& v, Scalar s); + friend Vector operator - (const Vector& v, Scalar s); + friend Vector operator * (const Vector& v, Scalar s); + friend Vector operator / (const Vector& v, Scalar s); + }; + + struct {__declspec(align(16)) Vector c, p, t;}; + struct {__declspec(align(16)) Vector sv[3];}; + struct {__declspec(align(16)) Scalar s[12];}; + + GSVertexSWFP() {} + GSVertexSWFP(const GSVertexSWFP& v) {*this = v;} + + void operator = (const GSVertexSWFP& v) {c = v.c; p = v.p; t = v.t;} + void operator += (const GSVertexSWFP& v) {c += v.c; p += v.p; t += v.t;} + + operator CPoint() const {return CPoint((int)p.x, (int)p.y);} + + __forceinline DWORD GetZ() const + { + return (int)p.z; + + ASSERT((float)p.z >= 0 && (float)p.q >= 0); +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + __m128 z = _mm_shuffle_ps(p, p, _MM_SHUFFLE(2,2,2,2)); + __m128 q = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3,3,3,3)); + // TODO: check if our floor is faster than doing ss->si->ss + int zh = _mm_cvttss_si32(z); + __m128 zhi = _mm_cvtsi32_ss(zhi, zh); + __m128 zhf = _mm_mul_ss(_mm_sub_ss(z, zhi), _mm_set_ss(65536)); + int zl = _mm_cvtss_si32(_mm_add_ss(zhf, q)); + return ((DWORD)zh << 16) + (DWORD)zl; +#else + // return ((DWORD)(int)p.z << 16) + (DWORD)(int)((p.z - p.z.floor_s())*65536 + p.q); + + int z = (int)p.z; + return ((DWORD)z << 16) + (DWORD)(((float)p.z - z)*65536 + (float)p.q); +#endif + } + + friend GSVertexSWFP operator + (const GSVertexSWFP& v1, const GSVertexSWFP& v2); + friend GSVertexSWFP operator - (const GSVertexSWFP& v1, const GSVertexSWFP& v2); + friend GSVertexSWFP operator * (const GSVertexSWFP& v, Scalar s); + friend GSVertexSWFP operator / (const GSVertexSWFP& v, Scalar s); + + static void Exchange(GSVertexSWFP* RESTRICT v1, GSVertexSWFP* RESTRICT v2) + { + Vector c = v1->c, p = v1->p, t = v1->t; + v1->c = v2->c; v1->p = v2->p; v1->t = v2->t; + v2->c = c; v2->p = p; v2->t = t; + } +}; + +#if _M_IX86_FP >= 2 || defined(_M_AMD64) + +__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_add_ps(v1, v2));} +__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_sub_ps(v1, v2));} +__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_mul_ps(v1, v2));} +__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_div_ps(v1, v2));} + +__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_add_ps(v, _mm_set1_ps(s)));} +__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_sub_ps(v, _mm_set1_ps(s)));} +__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps(s)));} +__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_div_ps(v, _mm_set1_ps(s)));} + +__forceinline GSVertexSWFP::Vector operator << (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps((float)(1 << i))));} +__forceinline GSVertexSWFP::Vector operator >> (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps(1.0f / (1 << i))));} + +#else + +__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.q + v2.q);} +__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.q - v2.q);} +__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.q * v2.q);} +__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.q / v2.q);} + +__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x + s, v.y + s, v.z + s, v.q + s);} +__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x - s, v.y - s, v.z - s, v.q - s);} +__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x * s, v.y * s, v.z * s, v.q * s);} +__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x / s, v.y / s, v.z / s, v.q / s);} + +__forceinline GSVertexSWFP::Vector operator << (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(v.x << i, v.y << i, v.z << i, v.q << i);} +__forceinline GSVertexSWFP::Vector operator >> (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(v.x >> i, v.y >> i, v.z >> i, v.q >> i);} + +#endif + +__forceinline GSVertexSWFP operator + (const GSVertexSWFP& v1, const GSVertexSWFP& v2) +{ + GSVertexSWFP v0; + v0.c = v1.c + v2.c; + v0.p = v1.p + v2.p; + v0.t = v1.t + v2.t; + return v0; +} + +__forceinline GSVertexSWFP operator - (const GSVertexSWFP& v1, const GSVertexSWFP& v2) +{ + GSVertexSWFP v0; + v0.c = v1.c - v2.c; + v0.p = v1.p - v2.p; + v0.t = v1.t - v2.t; + return v0; +} + +__forceinline GSVertexSWFP operator * (const GSVertexSWFP& v, GSVertexSWFP::Scalar s) +{ + GSVertexSWFP v0; + GSVertexSWFP::Vector vs(s); + v0.c = v.c * vs; + v0.p = v.p * vs; + v0.t = v.t * vs; + return v0; +} + +__forceinline GSVertexSWFP operator / (const GSVertexSWFP& v, GSVertexSWFP::Scalar s) +{ + GSVertexSWFP v0; + GSVertexSWFP::Vector vs(s); + v0.c = v.c / vs; + v0.p = v.p / vs; + v0.t = v.t / vs; + return v0; +} + +// #include "GSVertexSWFX.h" + diff --git a/gsdx10/GSdx10.cpp b/gsdx10/GSdx10.cpp new file mode 100644 index 0000000..e6ab6c4 --- /dev/null +++ b/gsdx10/GSdx10.cpp @@ -0,0 +1,326 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSdx10.h" +#include "GSRendererHW.h" +#include "GSRendererSW.h" +#include "GSRendererNull.h" +#include "GSSettingsDlg.h" + +#ifdef _DEBUG +#define new DEBUG_NEW +#endif + +// +// Note! +// +// If this DLL is dynamically linked against the MFC +// DLLs, any functions exported from this DLL which +// call into MFC must have the AFX_MANAGE_STATE macro +// added at the very beginning of the function. +// +// For example: +// +// extern "C" BOOL PASCAL EXPORT ExportedFunction() +// { +// AFX_MANAGE_STATE(AfxGetStaticModuleState()); +// // normal function body here +// } +// +// It is very important that this macro appear in each +// function, prior to any calls into MFC. This means that +// it must appear as the first statement within the +// function, even before any object variable declarations +// as their constructors may generate calls into the MFC +// DLL. +// +// Please see MFC Technical Notes 33 and 58 for additional +// details. +// + +BEGIN_MESSAGE_MAP(GSdx10App, CWinApp) +END_MESSAGE_MAP() + +GSdx10App::GSdx10App() +{ +} + +GSdx10App theApp; + +BOOL GSdx10App::InitInstance() +{ + __super::InitInstance(); + + SetRegistryKey(_T("Gabest")); + + return TRUE; +} + +// + +#define PS2E_LT_GS 0x01 +#define PS2E_GS_VERSION 0x0006 +#define PS2E_X86 0x01 // 32 bit +#define PS2E_X86_64 0x02 // 64 bit + +EXPORT_C_(UINT32) PS2EgetLibType() +{ + return PS2E_LT_GS; +} + +EXPORT_C_(char*) PS2EgetLibName() +{ + CString str = _T("GSdx10"); + +#if _M_AMD64 + str += _T(" 64-bit"); +#endif + + CAtlList sl; + +#ifdef __INTEL_COMPILER + CString s; + s.Format(_T("Intel C++ %d.%02d"), __INTEL_COMPILER/100, __INTEL_COMPILER%100); + sl.AddTail(s); +#elif _MSC_VER + CString s; + s.Format(_T("MSVC %d.%02d"), _MSC_VER/100, _MSC_VER%100); + sl.AddTail(s); +#endif + +#if _M_IX86_FP >= 2 + sl.AddTail(_T("SSE2")); +#elif _M_IX86_FP >= 1 + sl.AddTail(_T("SSE")); +#endif + + POSITION pos = sl.GetHeadPosition(); + + while(pos) + { + if(pos == sl.GetHeadPosition()) str += _T(" ("); + str += sl.GetNext(pos); + str += pos ? _T(", ") : _T(")"); + } + + static char buff[256]; + strncpy(buff, CStringA(str), min(countof(buff)-1, str.GetLength())); + return buff; +} + +EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type) +{ + const UINT32 revision = 0; + const UINT32 build = 1; + const UINT32 minor = 0; + + return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (minor << 24); +} + +EXPORT_C_(UINT32) PS2EgetCpuPlatform() +{ +#if _M_AMD64 + return PS2E_X86_64; +#else + return PS2E_X86; +#endif +} + +////////////////// + +static HRESULT s_hr = E_FAIL; +static GSRenderer* s_gs; +static void (*s_irq)() = NULL; +static BYTE* s_basemem = NULL; + +EXPORT_C GSsetBaseMem(BYTE* mem) +{ + s_basemem = mem - 0x12000000; +} + +EXPORT_C_(INT32) GSinit() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + return 0; +} + +EXPORT_C GSshutdown() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); +} + +EXPORT_C GSclose() +{ + delete s_gs; + + s_gs = NULL; + + if(SUCCEEDED(s_hr)) + { + ::CoUninitialize(); + + s_hr = E_FAIL; + } +} + +EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt) +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + GSclose(); + + bool nloophack = AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2) == 1; + + switch(AfxGetApp()->GetProfileInt(_T("Settings"), _T("renderer"), 0)) + { + case 0: s_gs = new GSRendererHW(s_basemem, !!mt, s_irq, nloophack); break; + case 1: s_gs = new GSRendererSWFP(s_basemem, !!mt, s_irq, nloophack); break; + case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack); break; + default: return -1; + } + + s_hr = ::CoInitialize(0); + + if(!s_gs->Create(CString(title))) + { + GSclose(); + return -1; + } + + s_gs->Show(); + + *(HWND*)dsp = *s_gs; + + return 0; +} + +EXPORT_C GSreset() +{ + s_gs->Reset(); +} + +EXPORT_C GSwriteCSR(UINT32 csr) +{ + s_gs->WriteCSR(csr); +} + +EXPORT_C GSreadFIFO(BYTE* mem) +{ + s_gs->ReadFIFO(mem, 1); +} + +EXPORT_C GSreadFIFO2(BYTE* mem, UINT32 size) +{ + s_gs->ReadFIFO(mem, size); +} + +EXPORT_C GSgifTransfer1(BYTE* mem, UINT32 addr) +{ + s_gs->Transfer(mem + addr, (0x4000 - addr) / 16, 0); +} + +EXPORT_C GSgifTransfer2(BYTE* mem, UINT32 size) +{ + s_gs->Transfer(mem, size, 1); +} + +EXPORT_C GSgifTransfer3(BYTE* mem, UINT32 size) +{ + s_gs->Transfer(mem, size, 2); +} + +EXPORT_C GSvsync(int field) +{ + s_gs->VSync(field); +} + +EXPORT_C_(UINT32) GSmakeSnapshot(char* path) +{ + return s_gs->MakeSnapshot(path); +} + +EXPORT_C GSkeyEvent(keyEvent* ev) +{ +} + +EXPORT_C_(INT32) GSfreeze(int mode, freezeData* data) +{ + if(mode == FREEZE_SAVE) + { + return s_gs->Freeze(data, false); + } + else if(mode == FREEZE_SIZE) + { + return s_gs->Freeze(data, true); + } + else if(mode == FREEZE_LOAD) + { + return s_gs->Defrost(data); + } + + return 0; +} + +EXPORT_C GSconfigure() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + if(IDOK == GSSettingsDlg().DoModal()) + { + GSshutdown(); + GSinit(); + } +} + +EXPORT_C_(INT32) GStest() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + CComPtr dev; + + return SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev)) ? 0 : -1; +} + +EXPORT_C GSabout() +{ +} + +EXPORT_C GSirqCallback(void (*irq)()) +{ + s_irq = irq; +} + +EXPORT_C GSsetGameCRC(int crc, int options) +{ + s_gs->SetGameCRC(crc, options); +} + +EXPORT_C GSgetLastTag(UINT32* tag) +{ + s_gs->GetLastTag(tag); +} + +EXPORT_C GSsetFrameSkip(int frameskip) +{ + s_gs->SetFrameSkip(frameskip); +} diff --git a/gsdx10/GSdx10.def b/gsdx10/GSdx10.def new file mode 100644 index 0000000..693232b --- /dev/null +++ b/gsdx10/GSdx10.def @@ -0,0 +1,33 @@ +; GSdx10.def : Declares the module parameters for the DLL. + +LIBRARY "GSdx10" + +EXPORTS + ; Explicit exports can go here + PS2EgetLibType + PS2EgetLibName + PS2EgetLibVersion2 + PS2EgetCpuPlatform + GSsetBaseMem + GSinit + GSshutdown + GSopen + GSclose + GSreset + GSwriteCSR + GSgifTransfer1 + GSgifTransfer2 + GSgifTransfer3 + GSvsync + GSmakeSnapshot + GSkeyEvent + GSfreeze + GSconfigure + GStest + GSabout + GSreadFIFO + GSreadFIFO2 + GSirqCallback + GSsetGameCRC + GSsetFrameSkip + ; GSReplay \ No newline at end of file diff --git a/gsdx10/GSdx10.h b/gsdx10/GSdx10.h new file mode 100644 index 0000000..8424900 --- /dev/null +++ b/gsdx10/GSdx10.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#ifndef __AFXWIN_H__ + #error include 'stdafx.h' before including this file for PCH +#endif + +class GSdx10App : public CWinApp +{ +public: + GSdx10App(); + +public: + virtual BOOL InitInstance(); + + DECLARE_MESSAGE_MAP() +}; diff --git a/gsdx10/GSdx10.rc b/gsdx10/GSdx10.rc new file mode 100644 index 0000000..e1b800a --- /dev/null +++ b/gsdx10/GSdx10.rc @@ -0,0 +1,197 @@ +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "afxres.h" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// Hungarian resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_HUN) +#ifdef _WIN32 +LANGUAGE LANG_HUNGARIAN, SUBLANG_DEFAULT +#pragma code_page(1250) +#endif //_WIN32 + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include ""afxres.h""\r\n" + "\0" +END + +3 TEXTINCLUDE +BEGIN + "#define _AFX_NO_SPLITTER_RESOURCES\r\n" + "#define _AFX_NO_OLE_RESOURCES\r\n" + "#define _AFX_NO_TRACKER_RESOURCES\r\n" + "#define _AFX_NO_PROPERTY_RESOURCES\r\n" + "\r\n" + "#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)\r\n" + "LANGUAGE 9, 1\r\n" + "#pragma code_page(1252)\r\n" + "#include ""res\\GSdx10.rc2"" // non-Microsoft Visual C++ edited resources\r\n" + "#include ""afxres.rc"" // Standard components\r\n" + "#endif\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + +#endif // Hungarian resources +///////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + +///////////////////////////////////////////////////////////////////////////// +// +// Bitmap +// + +IDB_LOGO1 BITMAP "res\\logo1.bmp" + +///////////////////////////////////////////////////////////////////////////// +// +// Dialog +// + +IDD_CONFIG DIALOGEX 0, 0, 189, 204 +STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU +CAPTION "Settings..." +FONT 8, "MS Shell Dlg", 400, 0, 0x1 +BEGIN + CONTROL 2023,IDC_STATIC,"Static",SS_BITMAP,7,7,175,44,WS_EX_CLIENTEDGE + LTEXT "Resolution:",IDC_STATIC,7,59,37,8 + COMBOBOX IDC_COMBO3,71,57,111,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + LTEXT "Renderer:",IDC_STATIC,7,74,34,8 + COMBOBOX IDC_COMBO1,71,72,111,69,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + LTEXT "Interlacing (F5):",IDC_STATIC,7,90,53,8 + COMBOBOX IDC_COMBO2,71,87,111,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + LTEXT "Aspect Ratio (F6):",IDC_STATIC,7,105,60,8 + COMBOBOX IDC_COMBO5,71,102,111,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + LTEXT "D3D internal res:",IDC_STATIC,7,120,55,8 + EDITTEXT IDC_EDIT1,71,117,35,13,ES_AUTOHSCROLL | ES_NUMBER + CONTROL "",IDC_SPIN1,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,132,11,14 + EDITTEXT IDC_EDIT2,109,117,35,13,ES_AUTOHSCROLL | ES_NUMBER + CONTROL "",IDC_SPIN2,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,134,132,11,14 + CONTROL "Native",IDC_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,149,119,33,10 + CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,139,67,10 + CONTROL "Enable NLOOP hack (third state => auto)",IDC_CHECK6, + "Button",BS_AUTO3STATE | WS_TABSTOP,7,151,149,10 + DEFPUSHBUTTON "OK",IDOK,42,183,50,14 + PUSHBUTTON "Cancel",IDCANCEL,96,183,50,14 + CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,7,163,51,10 +END + + +///////////////////////////////////////////////////////////////////////////// +// +// DESIGNINFO +// + +#ifdef APSTUDIO_INVOKED +GUIDELINES DESIGNINFO +BEGIN + IDD_CONFIG, DIALOG + BEGIN + LEFTMARGIN, 7 + RIGHTMARGIN, 182 + VERTGUIDE, 71 + VERTGUIDE, 182 + TOPMARGIN, 7 + BOTTOMMARGIN, 197 + END +END +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 1,0,0,0 + PRODUCTVERSION 1,0,0,0 + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x4L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904e4" + BEGIN + VALUE "Comments", "http://gabest.org/" + VALUE "CompanyName", "Gabest" + VALUE "FileDescription", "GS plugin for ps2 emulators" + VALUE "FileVersion", "1, 0, 0, 0" + VALUE "InternalName", "GSdx10.dll" + VALUE "LegalCopyright", "Copyright (c) 2007 Gabest. All rights reserved." + VALUE "OriginalFilename", "GSdx10.dll" + VALUE "ProductName", "GSdx10" + VALUE "ProductVersion", "1, 0, 0, 0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1252 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// +#define _AFX_NO_SPLITTER_RESOURCES +#define _AFX_NO_OLE_RESOURCES +#define _AFX_NO_TRACKER_RESOURCES +#define _AFX_NO_PROPERTY_RESOURCES + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +LANGUAGE 9, 1 +#pragma code_page(1252) +#include "res\GSdx10.rc2" // non-Microsoft Visual C++ edited resources +#include "afxres.rc" // Standard components +#endif + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/gsdx10/GSdx10_vs2005.vcproj b/gsdx10/GSdx10_vs2005.vcproj new file mode 100644 index 0000000..7517957 --- /dev/null +++ b/gsdx10/GSdx10_vs2005.vcprojdiff --git a/gsdx10/GSdx10_vs2008.vcproj b/gsdx10/GSdx10_vs2008.vcproj new file mode 100644 index 0000000..54eda1b --- /dev/null +++ b/gsdx10/GSdx10_vs2008.vcproj @@ -0,0 +1,835 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gsdx10/Resource.h b/gsdx10/Resource.h new file mode 100644 index 0000000..8a4247f --- /dev/null +++ b/gsdx10/Resource.h @@ -0,0 +1,39 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by GSdx10.rc +// +#define IDD_CONFIG 2001 +#define IDC_CHECK1 2002 +#define IDC_CHECK2 2003 +#define IDC_CHECK3 2004 +#define IDC_CHECK5 2005 +#define IDC_CHECK6 2006 +#define IDC_COMBO1 2007 +#define IDC_COMBO3 2008 +#define IDC_COMBO4 2009 +#define IDC_EDIT1 2010 +#define IDC_EDIT2 2011 +#define IDC_CUSTOM1 2014 +#define IDC_CHECK4 2015 +#define IDC_COMBO2 2016 +#define IDC_COMBO5 2017 +#define IDC_RADIO1 2018 +#define IDC_SPIN1 2020 +#define IDC_SPIN2 2021 +#define IDB_BITMAP1 2022 +#define IDB_LOGO1 2023 +#define IDR_CONVERT_FX 10000 +#define IDR_TFX_FX 10001 +#define IDR_MERGE_FX 10002 +#define IDR_INTERLACE_FX 10003 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 10004 +#define _APS_NEXT_COMMAND_VALUE 32771 +#define _APS_NEXT_CONTROL_VALUE 2024 +#define _APS_NEXT_SYMED_VALUE 5000 +#endif +#endif diff --git a/gsdx10/res/GSdx10.rc2 b/gsdx10/res/GSdx10.rc2 new file mode 100644 index 0000000..0c79a14 --- /dev/null +++ b/gsdx10/res/GSdx10.rc2 @@ -0,0 +1,18 @@ +// +// GSdx10.RC2 - resources Microsoft Visual C++ does not edit directly +// + +#ifdef APSTUDIO_INVOKED +#error this file is not editable by Microsoft Visual C++ +#endif //APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// Add manually edited resources here... + +IDR_CONVERT_FX RCDATA "res\\convert.fx" +IDR_TFX_FX RCDATA "res\\tfx.fx" +IDR_MERGE_FX RCDATA "res\\merge.fx" +IDR_INTERLACE_FX RCDATA "res\\interlace.fx" + +///////////////////////////////////////////////////////////////////////////// diff --git a/gsdx10/res/convert.fx b/gsdx10/res/convert.fx new file mode 100644 index 0000000..8c41a33 --- /dev/null +++ b/gsdx10/res/convert.fx @@ -0,0 +1,60 @@ +struct VS_INPUT +{ + float4 p : POSITION; + float2 t : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float4 p : SV_Position; + float2 t : TEXCOORD0; +}; + +VS_OUTPUT vs_main(VS_INPUT input) +{ + VS_OUTPUT output; + + output.p = input.p; + output.t = input.t; + + return output; +} + +Texture2D Texture; +SamplerState Sampler; + +struct PS_INPUT +{ + float4 p : SV_Position; + float2 t : TEXCOORD0; +}; + +float4 ps_main0(PS_INPUT input) : SV_Target0 +{ + return Texture.Sample(Sampler, input.t); +} + +uint ps_main1(PS_INPUT input) : SV_Target0 +{ + float4 f = Texture.Sample(Sampler, input.t); + + f.a *= 256.0f/127; // hm, 0.5 won't give us 1.0 if we just multiply with 2 + + uint4 i = f * float4(0x001f, 0x03e0, 0x7c00, 0x8000); + + return (i.x & 0x001f) | (i.y & 0x03e0) | (i.z & 0x7c00) | (i.w & 0x8000); +} + +float4 ps_main2(PS_INPUT input) : SV_Target0 +{ + clip(Texture.Sample(Sampler, input.t).a - (0.5 - 0.9f/256)); + + return 0; +} + +float4 ps_main3(PS_INPUT input) : SV_Target0 +{ + clip((0.5 - 0.9f/256) - Texture.Sample(Sampler, input.t).a); + + return 0; +} diff --git a/gsdx10/res/interlace.fx b/gsdx10/res/interlace.fx new file mode 100644 index 0000000..9eed8ed --- /dev/null +++ b/gsdx10/res/interlace.fx @@ -0,0 +1,43 @@ + +Texture2D Texture; +SamplerState Sampler; + +cbuffer cb0 +{ + float2 ZrH; + float hH; +}; + +struct PS_INPUT +{ + float4 p : SV_Position; + float2 t : TEXCOORD0; +}; + +float4 ps_main0(PS_INPUT input) : SV_Target0 +{ + clip(frac(input.t.y * hH) - 0.5); + + return Texture.Sample(Sampler, input.t); +} + +float4 ps_main1(PS_INPUT input) : SV_Target0 +{ + clip(0.5 - frac(input.t.y * hH)); + + return Texture.Sample(Sampler, input.t); +} + +float4 ps_main2(PS_INPUT input) : SV_Target0 +{ + float4 c0 = Texture.Sample(Sampler, input.t - ZrH); + float4 c1 = Texture.Sample(Sampler, input.t); + float4 c2 = Texture.Sample(Sampler, input.t + ZrH); + + return (c0 + c1 * 2 + c2) / 4; +} + +float4 ps_main3(PS_INPUT input) : SV_Target0 +{ + return Texture.Sample(Sampler, input.t); +} diff --git a/gsdx10/res/logo1.bmp b/gsdx10/res/logo1.bmp new file mode 100644 index 0000000..25f6b17 Binary files /dev/null and b/gsdx10/res/logo1.bmp differ diff --git a/gsdx10/res/merge.fx b/gsdx10/res/merge.fx new file mode 100644 index 0000000..aa72c3c --- /dev/null +++ b/gsdx10/res/merge.fx @@ -0,0 +1,53 @@ +struct VS_INPUT +{ + float4 p : POSITION; + float2 t0 : TEXCOORD0; + float2 t1 : TEXCOORD1; +}; + +struct VS_OUTPUT +{ + float4 p : SV_Position; + float2 t0 : TEXCOORD0; + float2 t1 : TEXCOORD1; +}; + +VS_OUTPUT vs_main(VS_INPUT input) +{ + VS_OUTPUT output; + + output.p = input.p; + output.t0 = input.t0; + output.t1 = input.t1; + + return output; +} + +Texture2D RA01; +Texture2D RA02; +SamplerState Sampler; + +cbuffer cb1 +{ + float4 BGColor; + float Alpha; + float EN1; + float EN2; + int MMOD; + int SLBG; +}; + +struct PS_INPUT +{ + float4 p : SV_Position; + float2 t0 : TEXCOORD0; + float2 t1 : TEXCOORD1; +}; + +float4 ps_main(PS_INPUT input) : SV_Target0 +{ + float4 c0 = EN1 * RA01.Sample(Sampler, input.t0); + float4 c1 = SLBG ? BGColor : EN2 * RA02.Sample(Sampler, input.t1); + float a = EN1 * (MMOD ? Alpha : min(c0.a * 2, 1)); + return lerp(c1, c0, a); +} diff --git a/gsdx10/res/tfx.fx b/gsdx10/res/tfx.fx new file mode 100644 index 0000000..c889033 --- /dev/null +++ b/gsdx10/res/tfx.fx @@ -0,0 +1,343 @@ +cbuffer cb0 +{ + float4 VertexScale; + float4 VertexOffset; + float2 TextureScale; +}; + +struct VS_INPUT +{ + float4 p : POSITION; + float4 c : COLOR0; + float4 f : COLOR1; + float2 t : TEXCOORD0; +}; + +struct VS_OUTPUT +{ + float4 p : SV_Position; + float4 c : COLOR0; + float4 t : TEXCOORD0; +}; + +VS_OUTPUT vs_main(VS_INPUT input) +{ + VS_OUTPUT output; + + output.p = input.p * VertexScale - VertexOffset; + + output.c = input.c; + + output.t.xy = input.t.xy * TextureScale; + output.t.z = input.f.a; + output.t.w = input.p.w < 0 ? 1 : input.p.w; // FIXME: <= takes small but not 0 numbers as 0 + + return output; +} + +#ifndef IIP +#define IIP 0 +#define PRIM 3 +#endif + +#if PRIM == 0 + +[maxvertexcount(1)] +void gs_main(point VS_OUTPUT input[1], inout PointStream stream) +{ + stream.Append(input[0]); +} + +#elif PRIM == 1 + +[maxvertexcount(2)] +void gs_main(line VS_OUTPUT input[2], inout LineStream stream) +{ + #if IIP == 0 + input[0].c = input[1].c; + input[0].t.z = input[1].t.z; + #endif + + stream.Append(input[0]); + stream.Append(input[1]); +} + +#elif PRIM == 2 + +[maxvertexcount(3)] +void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream stream) +{ + #if IIP == 0 + input[0].c = input[2].c; + input[0].t.z = input[2].t.z; + input[1].c = input[2].c; + input[1].t.z = input[2].t.z; + #endif + + stream.Append(input[0]); + stream.Append(input[1]); + stream.Append(input[2]); +} + +#elif PRIM == 3 + +[maxvertexcount(4)] +void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream) +{ + input[0].p.z = input[1].p.z; + input[0].t.zw = input[1].t.zw; + + VS_OUTPUT lb = input[1]; + + lb.p.x = input[0].p.x; + lb.t.x = input[0].t.x; + + VS_OUTPUT rt = input[1]; + + rt.p.y = input[0].p.y; + rt.t.y = input[0].t.y; + + stream.Append(input[0]); + stream.Append(lb); + stream.Append(rt); + stream.Append(input[1]); +} + +#endif + +Texture2D Texture; +Texture2D Palette; +SamplerState Sampler; + +cbuffer cb1 +{ + float4 FogColor; + float2 ClampMin; + float2 ClampMax; + float TA0; + float TA1; + float AREF; + float _pad; + float2 WH; + float2 rWrH; + float2 rWZ; + float2 ZrH; +}; + +struct PS_INPUT +{ + float4 p : SV_Position; + float4 c : COLOR0; + float4 t : TEXCOORD0; +}; + +struct PS_OUTPUT +{ + float4 c0 : SV_Target0; + float4 c1 : SV_Target1; +}; + +#ifndef FST +#define FST 0 +#define CLAMP 0 +#define BPP 0 +#define AEM 0 +#define TFX 0 +#define TCC 1 +#define ATE 1 +#define ATST 2 +#define FOG 0 +#define CLR1 0 +#define FBA 0 +#define AOUT 0 +#endif + +float4 Normalize16(float4 f) +{ + return f / float4(0x001f, 0x03e0, 0x7c00, 0x8000); +} + +float4 Extract16(uint i) +{ + float4 f; + + f.r = i & 0x001f; + f.g = i & 0x03e0; + f.b = i & 0x7c00; + f.a = i & 0x8000; + + return f; +} + +PS_OUTPUT ps_main(PS_INPUT input) +{ + float2 tc = input.t.xy; + + if(FST == 0) + { + tc /= input.t.w; + } + + if(CLAMP == 1) + { + tc = clamp(tc, ClampMin, ClampMax); + } + + // TODO: region repeat (PITA, would loose automatic bilinear then) + + float4 t; + + if(BPP == 0) // 32 + { + t = Texture.Sample(Sampler, tc); + } + else if(BPP == 1) // 24 + { + t = Texture.Sample(Sampler, tc); + + t.a = AEM == 0 || any(t.rgb) ? TA0 : 0; + } + else if(BPP == 2) // 16 + { + t = Texture.Sample(Sampler, tc); + + t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated + } + else if(BPP == 3) // 16P + { + // tc -= 0.5 * rWrH; // ? + + uint4 i = float4( + Texture.Sample(Sampler, tc).r, + Texture.Sample(Sampler, tc + rWZ).r, + Texture.Sample(Sampler, tc + ZrH).r, + Texture.Sample(Sampler, tc + rWrH).r) * 65535; + + float4 t00 = Extract16(i.x); + float4 t01 = Extract16(i.y); + float4 t10 = Extract16(i.z); + float4 t11 = Extract16(i.w); + + float2 dd = frac(tc * WH); + + t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); + + t = Normalize16(t); + + t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated + } + else if(BPP == 4) // 8HP / 32-bit palette + { + // tc -= 0.5 * rWrH; // ? + + float4 f = float4( + Texture.Sample(Sampler, tc).a, + Texture.Sample(Sampler, tc + rWZ).a, + Texture.Sample(Sampler, tc + ZrH).a, + Texture.Sample(Sampler, tc + rWrH).a); + + float4 t00 = Palette.Sample(Sampler, f.x); + float4 t01 = Palette.Sample(Sampler, f.y); + float4 t10 = Palette.Sample(Sampler, f.z); + float4 t11 = Palette.Sample(Sampler, f.w); + + float2 dd = frac(tc * WH); + + t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); + } + else if(BPP == 5) // 8HP / 16-bit palette + { + // TODO: yuck, just pre-convert the palette to 32-bit + } + + float4 c = input.c; + + if(TFX == 0) + { + if(TCC == 0) + { + c.rgb = c.rgb * t.rgb * 2; + } + else + { + c = c * t * 2; + } + } + else if(TFX == 1) + { + c = t; + } + else if(TFX == 2) + { + c.rgb = c.rgb * t.rgb * 2 + c.a; + + if(TCC == 1) + { + c.a += t.a; + } + } + else if(TFX == 3) + { + c.rgb = c.rgb * t.rgb * 2 + c.a; + + if(TCC == 1) + { + c.a = t.a; + } + } + + c = saturate(c); + + // TODO: alpha test hurts a lot + + if(ATE == 1) + { + if(ATST == 0) + { + discard; + } + else if(ATST == 2 || ATST == 3) // l, le + { + clip(AREF - c.a); + } + else if(ATST == 4) // e + { + clip(0.9f/256 - abs(c.a - AREF)); + } + else if(ATST == 5 || ATST == 6) // ge, g + { + clip(c.a - AREF); + } + else if(ATST == 7) // ne + { + clip(abs(c.a - AREF) - 0.9f/256); + } + } + + if(FOG == 1) + { + c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z); + } + + if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes + { + c.rgb = 1; + } + + PS_OUTPUT output; + + output.c1 = c.a * 2; // used for alpha blending + + if(AOUT == 1) // 16 bit output + { + c.a = FBA == 1 ? 0.5 : step(0.5, c.a) * 0.5; + } + else if(FBA == 1) + { + if(c.a < 0.5) c.a += 0.5; + } + + output.c0 = c; + + return output; +} diff --git a/gsdx10/stdafx.cpp b/gsdx10/stdafx.cpp new file mode 100644 index 0000000..aa3b996 --- /dev/null +++ b/gsdx10/stdafx.cpp @@ -0,0 +1,7 @@ +// stdafx.cpp : source file that includes just the standard includes +// GSdx10.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + + diff --git a/gsdx10/stdafx.h b/gsdx10/stdafx.h new file mode 100644 index 0000000..d7c9ad9 --- /dev/null +++ b/gsdx10/stdafx.h @@ -0,0 +1,64 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently + +#pragma once + +#pragma warning(disable: 4996) + +#ifndef VC_EXTRALEAN +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers +#endif + +// Modify the following defines if you have to target a platform prior to the ones specified below. +// Refer to MSDN for the latest info on corresponding values for different platforms. +#ifndef WINVER // Allow use of features specific to Windows 95 and Windows NT 4 or later. +#define WINVER 0x0510 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later. +#endif + +#ifndef _WIN32_WINNT // Allow use of features specific to Windows NT 4 or later. +#define _WIN32_WINNT 0x0400 // Change this to the appropriate value to target Windows 2000 or later. +#endif + +#ifndef _WIN32_WINDOWS // Allow use of features specific to Windows 98 or later. +#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later. +#endif + +#ifndef _WIN32_IE // Allow use of features specific to IE 4.0 or later. +#define _WIN32_IE 0x0400 // Change this to the appropriate value to target IE 5.0 or later. +#endif + +#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit + +#include // MFC core and standard components +#include // MFC extensions + +#ifndef _AFX_NO_AFXCMN_SUPPORT +#include // MFC support for Windows Common Controls +#endif // _AFX_NO_AFXCMN_SUPPORT + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../GSdx/GSState.h" + +#define countof(a) (sizeof(a)/sizeof(a[0])) + +#ifndef RESTRICT + #ifdef __INTEL_COMPILER + #define RESTRICT restrict + #elif _MSC_VER >= 1400 + #define RESTRICT __restrict + #else + #define RESTRICT + #endif +#endif + +#define EXPORT_C extern "C" __declspec(dllexport) void __stdcall +#define EXPORT_C_(type) extern "C" __declspec(dllexport) type __stdcall \ No newline at end of file diff --git a/release.vsprops b/release.vsprops new file mode 100644 index 0000000..5cc4030 --- /dev/null +++ b/release.vsprops @@ -0,0 +1,25 @@ + + + + + diff --git a/sse2.vsprops b/sse2.vsprops new file mode 100644 index 0000000..9ba4ff3 --- /dev/null +++ b/sse2.vsprops @@ -0,0 +1,11 @@ + + + +