From cfdca9259d3e1fc20c9883ed95c4b3240d5f6ed2 Mon Sep 17 00:00:00 2001 From: gabest Date: Mon, 24 Nov 2008 05:44:27 +0000 Subject: [PATCH] --- GSdx_vs2005.sln | 92 -- cdvd/SettingsDlg.h | 1 - gsdx/GPU.cpp | 272 ++++ gsdx/GPU.h | 274 ++++ gsdx/GPUDrawingEnvironment.h | 78 + gsdx/GPULocalMemory.cpp | 246 +++ gsdx/GPULocalMemory.h | 59 + gsdx/GPURasterizer.cpp | 1487 +++++++++++++++++++ gsdx/GPURasterizer.h | 125 ++ gsdx/GPURenderer.cpp | 23 + gsdx/GPURenderer.h | 322 ++++ gsdx/GPURendererSW.cpp | 23 + gsdx/GPURendererSW.h | 199 +++ gsdx/GPUState.cpp | 822 +++++++++++ gsdx/GPUState.h | 145 ++ gsdx/GPUTextureCacheSW.cpp | 130 ++ gsdx/GPUTextureCacheSW.h | 41 + gsdx/GPUVertex.h | 51 + gsdx/GPUVertexSW.h | 96 ++ gsdx/GS.cpp | 704 +++++++++ gsdx/GS.h | 16 +- gsdx/GSClut.cpp | 2 + gsdx/GSCrc.cpp | 4 + gsdx/GSDump.cpp | 2 +- gsdx/GSDump.h | 2 +- gsdx/GSRasterizer.cpp | 236 ++- gsdx/GSRasterizer.h | 62 +- gsdx/GSRasterizerEx.cpp | 313 ++-- gsdx/GSRenderer.h | 7 +- gsdx/GSRendererSW.h | 2 +- gsdx/GSState.cpp | 4 +- gsdx/GSState.h | 6 +- gsdx/GSUtil.cpp | 117 +- gsdx/GSUtil.h | 43 +- gsdx/GSVector.cpp | 10 + gsdx/GSVector.h | 75 + gsdx/GSVertexList.h | 4 +- gsdx/GSdx.cpp | 787 +--------- gsdx/GSdx.def | 29 +- gsdx/GSdx_vs2005.vcproj | 2711 ---------------------------------- gsdx/GSdx_vs2008.vcproj | 88 ++ 41 files changed, 5764 insertions(+), 3946 deletions(-) delete mode 100644 GSdx_vs2005.sln create mode 100644 gsdx/GPU.cpp create mode 100644 gsdx/GPU.h create mode 100644 gsdx/GPUDrawingEnvironment.h create mode 100644 gsdx/GPULocalMemory.cpp create mode 100644 gsdx/GPULocalMemory.h create mode 100644 gsdx/GPURasterizer.cpp create mode 100644 gsdx/GPURasterizer.h create mode 100644 gsdx/GPURenderer.cpp create mode 100644 gsdx/GPURenderer.h create mode 100644 gsdx/GPURendererSW.cpp create mode 100644 gsdx/GPURendererSW.h create mode 100644 gsdx/GPUState.cpp create mode 100644 gsdx/GPUState.h create mode 100644 gsdx/GPUTextureCacheSW.cpp create mode 100644 gsdx/GPUTextureCacheSW.h create mode 100644 gsdx/GPUVertex.h create mode 100644 gsdx/GPUVertexSW.h create mode 100644 gsdx/GS.cpp delete mode 100644 gsdx/GSdx_vs2005.vcproj diff --git a/GSdx_vs2005.sln b/GSdx_vs2005.sln deleted file mode 100644 index e38bde1..0000000 --- a/GSdx_vs2005.sln +++ /dev/null @@ -1,92 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 9.00 -# Visual Studio 2005 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "gsdx\GSdx_vs2005.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xpad", "xpad\xpad_vs2005.vcproj", "{6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug SSE2|Win32 = Debug SSE2|Win32 - Debug SSE2|x64 = Debug SSE2|x64 - Debug SSE4|Win32 = Debug SSE4|Win32 - Debug SSE4|x64 = Debug SSE4|x64 - Debug SSSE3|Win32 = Debug SSSE3|Win32 - Debug SSSE3|x64 = Debug SSSE3|x64 - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - Release SSE2|Win32 = Release SSE2|Win32 - Release SSE2|x64 = Release SSE2|x64 - Release SSE4|Win32 = Release SSE4|Win32 - Release SSE4|x64 = Release SSE4|x64 - Release SSSE3|Win32 = Release SSSE3|Win32 - Release SSSE3|x64 = Release SSSE3|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE4|Win32.ActiveCfg = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE4|x64.ActiveCfg = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE4|x64.Build.0 = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSSE3|Win32.ActiveCfg = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSSE3|x64.ActiveCfg = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSSE3|x64.Build.0 = Debug SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.Build.0 = Debug|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.Build.0 = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|Win32.ActiveCfg = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.ActiveCfg = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE4|x64.Build.0 = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|Win32.ActiveCfg = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.ActiveCfg = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSSE3|x64.Build.0 = Release SSE2|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64 - {18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.Build.0 = Release|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE2|x64.Build.0 = Debug SSE2|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|Win32.ActiveCfg = Debug SSE4|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|Win32.Build.0 = Debug SSE4|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|x64.ActiveCfg = Debug SSE4|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSE4|x64.Build.0 = Debug SSE4|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|Win32.ActiveCfg = Debug SSSE3|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|Win32.Build.0 = Debug SSSE3|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|x64.ActiveCfg = Debug SSSE3|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug SSSE3|x64.Build.0 = Debug SSSE3|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|Win32.ActiveCfg = Debug|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|Win32.Build.0 = Debug|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|x64.ActiveCfg = Debug|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Debug|x64.Build.0 = Debug|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|Win32.Build.0 = Release SSE2|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.ActiveCfg = Release SSE2|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE2|x64.Build.0 = Release SSE2|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.ActiveCfg = Release SSE4|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|Win32.Build.0 = Release SSE4|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.ActiveCfg = Release SSE4|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSE4|x64.Build.0 = Release SSE4|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.ActiveCfg = Release SSSE3|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|Win32.Build.0 = Release SSSE3|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.ActiveCfg = Release SSSE3|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release SSSE3|x64.Build.0 = Release SSSE3|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|Win32.ActiveCfg = Release|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|Win32.Build.0 = Release|Win32 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|x64.ActiveCfg = Release|x64 - {6F3C4136-5801-4EBC-AC6E-37DF6FAB150A}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/cdvd/SettingsDlg.h b/cdvd/SettingsDlg.h index 2ff9c87..f2b4f5a 100644 --- a/cdvd/SettingsDlg.h +++ b/cdvd/SettingsDlg.h @@ -31,6 +31,5 @@ protected: LRESULT WindowProc(UINT message, WPARAM wParam, LPARAM lParam); afx_msg void OnBrowse(); -public: afx_msg void OnBnClickedOk(); }; diff --git a/gsdx/GPU.cpp b/gsdx/GPU.cpp new file mode 100644 index 0000000..312b304 --- /dev/null +++ b/gsdx/GPU.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSUtil.h" +#include "GPURendererSW.h" +#include "GSDevice9.h" +#include "GSDevice10.h" + +#define PSE_LT_GPU 2 + +static HRESULT s_hr = E_FAIL; +static GPURendererBase* s_gpu = NULL; + +EXPORT_C_(UINT32) PSEgetLibType() +{ + return PSE_LT_GPU; +} + +EXPORT_C_(char*) PSEgetLibName() +{ + return GSUtil::GetLibName(); +} + +EXPORT_C_(UINT32) PSEgetLibVersion() +{ + static const UINT32 version = 1; + static const UINT32 revision = 1; + + return version << 16 | revision << 8 | PLUGIN_VERSION; +} + +EXPORT_C_(INT32) GPUinit() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + // TODO + + return 0; +} + +EXPORT_C_(INT32) GPUshutdown() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + // TODO + + return 0; +} + +EXPORT_C_(INT32) GPUclose() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + delete s_gpu; + + s_gpu = NULL; + + if(SUCCEEDED(s_hr)) + { + ::CoUninitialize(); + + s_hr = E_FAIL; + } + + return 0; +} + +EXPORT_C_(INT32) GPUopen(HWND hWnd) +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + if(!GSUtil::CheckDirectX() || !GSUtil::CheckSSE()) + { + return -1; + } + + GPUclose(); + + GPURendererSettings rs; + + rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); + + s_gpu = new GPURendererSW(rs); + + s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); + + if(!s_gpu->Create(hWnd)) + { + GPUclose(); + + return -1; + } + + return 0; +} + +EXPORT_C_(INT32) GPUconfigure() +{ + // TODO + + return 0; +} + +EXPORT_C_(INT32) GPUtest() +{ + // TODO + + return 0; +} + +EXPORT_C GPUabout() +{ + // TODO +} + +EXPORT_C GPUwriteDataMem(const BYTE* mem, UINT32 size) +{ + s_gpu->WriteData(mem, size); +} + +EXPORT_C GPUwriteData(UINT32 data) +{ + s_gpu->WriteData((BYTE*)&data, 1); +} + +EXPORT_C GPUreadDataMem(BYTE* mem, UINT32 size) +{ + s_gpu->ReadData(mem, size); +} + +EXPORT_C_(UINT32) GPUreadData() +{ + UINT32 data = 0; + + s_gpu->ReadData((BYTE*)&data, 1); + + return data; +} + +EXPORT_C GPUwriteStatus(UINT32 status) +{ + s_gpu->WriteStatus(status); +} + +EXPORT_C_(UINT32) GPUreadStatus() +{ + return s_gpu->ReadStatus(); +} + +EXPORT_C_(UINT32) GPUdmaChain(const BYTE* mem, UINT32 addr) +{ + // TODO + + do + { + BYTE size = mem[addr + 3]; + + if(size > 0) + { + s_gpu->WriteData(&mem[addr + 4], size); + } + + addr = *(UINT32*)&mem[addr] & 0xffffff; + } + while(addr != 0xffffff); + + return 0; +} + +EXPORT_C_(UINT32) GPUgetMode() +{ + // TODO + + return 0; +} + +EXPORT_C GPUsetMode(UINT32) +{ + // TODO +} + +EXPORT_C GPUupdateLace() +{ + s_gpu->VSync(); +} + +EXPORT_C GPUmakeSnapshot() +{ + LPCTSTR path = _T("C:\\"); // TODO + + s_gpu->MakeSnapshot(path); +} + +EXPORT_C GPUdisplayText(char* text) +{ + // TODO +} + +EXPORT_C GPUdisplayFlags(UINT32 flags) +{ + // TODO +} + +EXPORT_C_(INT32) GPUfreeze(UINT32 type, GPUFreezeData* data) +{ + if(!data || data->version != 1) + { + return 0; + } + + if(type == 0) + { + s_gpu->Defrost(data); + + return 1; + } + else if(type == 1) + { + s_gpu->Freeze(data); + + return 1; + } + else if(type == 2) + { + int slot = *(int*)data + 1; + + if(slot < 1 || slot > 9) + { + return 0; + } + + // TODO + + return 1; + } + + return 0; +} + +EXPORT_C GPUgetScreenPic(BYTE* mem) +{ + // TODO +} + +EXPORT_C GPUshowScreenPic(BYTE* mem) +{ + // TODO +} + +EXPORT_C GPUcursor(int player, int x, int y) +{ + // TODO +} \ No newline at end of file diff --git a/gsdx/GPU.h b/gsdx/GPU.h new file mode 100644 index 0000000..af3da71 --- /dev/null +++ b/gsdx/GPU.h @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#pragma pack(push, 1) + +#include "GS.h" + +enum +{ + GPU_POLYGON = 1, + GPU_LINE = 2, + GPU_SPRITE = 3, +}; + +REG32_(GPUReg, STATUS) + UINT32 TX:4; + UINT32 TY:1; + UINT32 ABR:2; + UINT32 TP:2; + UINT32 DTD:1; + UINT32 DFE:1; + UINT32 MD:1; + UINT32 ME:1; + UINT32 _PAD0:3; + UINT32 WIDTH1:1; + UINT32 WIDTH0:2; + UINT32 HEIGHT:1; + UINT32 ISPAL:1; + UINT32 ISRGB24:1; + UINT32 ISINTER:1; + UINT32 DEN:1; + UINT32 _PAD1:2; + UINT32 IDLE:1; + UINT32 IMG:1; + UINT32 COM:1; + UINT32 DMA:2; + UINT32 LCF:1; + /* + UINT32 TX:4; + UINT32 TY:1; + UINT32 ABR:2; + UINT32 TP:2; + UINT32 DTD:1; + UINT32 DFE:1; + UINT32 PBW:1; + UINT32 PBC:1; + UINT32 _PAD0:3; + UINT32 HRES2:1; + UINT32 HRES1:2; + UINT32 VRES:1; + UINT32 ISPAL:1; + UINT32 ISRGB24:1; + UINT32 ISINTER:1; + UINT32 ISSTOP:1; + UINT32 _PAD1:1; + UINT32 DMARDY:1; + UINT32 IDIDLE:1; + UINT32 DATARDY:1; + UINT32 ISEMPTY:1; + UINT32 TMODE:2; + UINT32 ODE:1; + */ +REG_END + +REG32_(GPUReg, PACKET) + UINT32 _PAD:24; + UINT32 OPTION:5; + UINT32 TYPE:3; +REG_END + +REG32_(GPUReg, PRIM) + UINT32 VTX:24; + UINT32 TGE:1; + UINT32 ABE:1; + UINT32 TME:1; + UINT32 _PAD2:1; + UINT32 IIP:1; + UINT32 TYPE:3; +REG_END + +REG32_(GPUReg, POLYGON) + UINT32 _PAD:24; + UINT32 TGE:1; + UINT32 ABE:1; + UINT32 TME:1; + UINT32 VTX:1; + UINT32 IIP:1; + UINT32 TYPE:3; +REG_END + +REG32_(GPUReg, LINE) + UINT32 _PAD:24; + UINT32 ZERO1:1; + UINT32 ABE:1; + UINT32 ZERO2:1; + UINT32 PLL:1; + UINT32 IIP:1; + UINT32 TYPE:3; +REG_END + +REG32_(GPUReg, SPRITE) + UINT32 _PAD:24; + UINT32 ZERO:1; + UINT32 ABE:1; + UINT32 TME:1; + UINT32 SIZE:2; + UINT32 TYPE:3; +REG_END + +REG32_(GPUReg, RESET) + UINT32 _PAD:32; +REG_END + +REG32_(GPUReg, DEN) + UINT32 DEN:1; + UINT32 _PAD:31; +REG_END + +REG32_(GPUReg, DMA) + UINT32 DMA:2; + UINT32 _PAD:30; +REG_END + +REG32_(GPUReg, DAREA) + UINT32 X:10; + UINT32 Y:9; + UINT32 _PAD:13; +REG_END + +REG32_(GPUReg, DHRANGE) + UINT32 X1:12; + UINT32 X2:12; + UINT32 _PAD:8; +REG_END + +REG32_(GPUReg, DVRANGE) + UINT32 Y1:10; + UINT32 Y2:11; + UINT32 _PAD:11; +REG_END + +REG32_(GPUReg, DMODE) + UINT32 WIDTH0:2; + UINT32 HEIGHT:1; + UINT32 ISPAL:1; + UINT32 ISRGB24:1; + UINT32 ISINTER:1; + UINT32 WIDTH1:1; + UINT32 REVERSE:1; + UINT32 _PAD:24; +REG_END + +REG32_(GPUReg, GPUINFO) + UINT32 PARAM:24; + UINT32 _PAD:8; +REG_END + +REG32_(GPUReg, MODE) + UINT32 TX:4; + UINT32 TY:1; + UINT32 ABR:2; + UINT32 TP:2; + UINT32 DTD:1; + UINT32 DFE:1; + UINT32 _PAD:21; +REG_END + +REG32_(GPUReg, MASK) + UINT32 MD:1; + UINT32 ME:1; + UINT32 _PAD:30; +REG_END + +REG32_(GPUReg, DRAREA) + UINT32 X:10; + UINT32 Y:10; + UINT32 _PAD:12; +REG_END + +REG32_(GPUReg, DROFF) + INT32 X:11; + INT32 Y:11; + INT32 _PAD:10; +REG_END + +REG32_(GPUReg, RGB) + UINT32 R:8; + UINT32 G:8; + UINT32 B:8; + UINT32 _PAD:8; +REG_END + +REG32_(GPUReg, XY) + INT32 X:16; + INT32 Y:16; +REG_END + +REG32_(GPUReg, UV) + UINT32 U:8; + UINT32 V:8; + UINT32 _PAD:16; +REG_END + +REG32_(GPUReg, TWIN) + UINT32 TWW:5; + UINT32 TWH:5; + UINT32 TWX:5; + UINT32 TWY:5; + UINT32 _PAD:12; +REG_END + +REG32_(GPUReg, CLUT) + UINT32 _PAD1:16; + UINT32 X:6; + UINT32 Y:9; + UINT32 _PAD2:1; +REG_END + +REG32_SET(GPUReg) + GPURegSTATUS STATUS; + GPURegPACKET PACKET; + GPURegPRIM PRIM; + GPURegPOLYGON POLYGON; + GPURegLINE LINE; + GPURegSPRITE SPRITE; + GPURegRESET RESET; + GPURegDEN DEN; + GPURegDMA DMA; + GPURegDAREA DAREA; + GPURegDHRANGE DHRANGE; + GPURegDVRANGE DVRANGE; + GPURegDMODE DMODE; + GPURegGPUINFO GPUINFO; + GPURegMODE MODE; + GPURegMASK MASK; + GPURegDRAREA DRAREA; + GPURegDROFF DROFF; + GPURegRGB RGB; + GPURegXY XY; + GPURegUV UV; + GPURegTWIN TWIN; + GPURegCLUT CLUT; +REG_SET_END + +struct GPUFreezeData +{ + UINT32 version; // == 1 + UINT32 status; + UINT32 control[256]; + UINT16 vram[1024 * 1024]; +}; + +#pragma pack(pop) + diff --git a/gsdx/GPUDrawingEnvironment.h b/gsdx/GPUDrawingEnvironment.h new file mode 100644 index 0000000..a9d8d0b --- /dev/null +++ b/gsdx/GPUDrawingEnvironment.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPU.h" + +#pragma pack(push, 1) + +__declspec(align(16)) class GPUDrawingEnvironment +{ +public: + GPURegSTATUS STATUS; + GPURegPRIM PRIM; + GPURegDAREA DAREA; + GPURegDHRANGE DHRANGE; + GPURegDVRANGE DVRANGE; + GPURegDRAREA DRAREATL; + GPURegDRAREA DRAREABR; + GPURegDROFF DROFF; + GPURegTWIN TWIN; + GPURegCLUT CLUT; + + GPUDrawingEnvironment() + { + Reset(); + } + + void Reset() + { + memset(this, 0, sizeof(*this)); + + STATUS.IDLE = 1; + STATUS.COM = 1; + } + + CRect GetDisplayRect() + { + static int s_width[] = {256, 320, 512, 640, 384, 320, 320, 320}; + static int s_height[] = {240, 480}; + + CRect r; + + r.left = DAREA.X & ~7; + r.top = DAREA.Y; + r.right = r.left + s_width[(STATUS.WIDTH1 << 2) | STATUS.WIDTH0]; + r.bottom = r.top + s_height[STATUS.HEIGHT]; + + r &= CRect(0, 0, 1024, 512); + + return r; + } + + int GetFPS() + { + return STATUS.ISPAL ? 50 : 60; + } +}; + +#pragma pack(pop) diff --git a/gsdx/GPULocalMemory.cpp b/gsdx/GPULocalMemory.cpp new file mode 100644 index 0000000..03f9aa5 --- /dev/null +++ b/gsdx/GPULocalMemory.cpp @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GPULocalMemory.h" + +const GSVector4i GPULocalMemory::m_xxxa(0x00008000); +const GSVector4i GPULocalMemory::m_xxbx(0x00007c00); +const GSVector4i GPULocalMemory::m_xgxx(0x000003e0); +const GSVector4i GPULocalMemory::m_rxxx(0x0000001f); + +static void CheckRect(const CRect& r) +{ + ASSERT(r.left >= 0 && r.left <= 1024); + ASSERT(r.right >= 0 && r.right <= 1024); + ASSERT(r.top >= 0 && r.top <= 512); + ASSERT(r.bottom >= 0 && r.bottom <= 512); + ASSERT(r.left <= r.right); + ASSERT(r.top <= r.bottom); +} + +GPULocalMemory::GPULocalMemory() +{ + m_vm8 = (BYTE*)VirtualAlloc(NULL, m_vmsize * 2, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + + memset(m_vm8, 0, m_vmsize); +} + +GPULocalMemory::~GPULocalMemory() +{ + VirtualFree(m_vm8, 0, MEM_RELEASE); +} + +void GPULocalMemory::FillRect(const CRect& r, WORD c) +{ + CheckRect(r); + + CRect clip; + + clip.left = (r.left + 7) & ~7; + clip.top = r.top; + clip.right = r.right & ~7; + clip.bottom = r.bottom; + + for(int y = r.top; y < clip.top; y++) + { + WORD* p = &m_vm16[y << 10]; + + for(int x = r.left; x < r.right; x++) + { + p[x] = c; + } + } + + for(int y = clip.bottom; y < r.bottom; y++) + { + WORD* p = &m_vm16[y << 10]; + + for(int x = r.left; x < r.right; x++) + { + p[x] = c; + } + } + + if(r.left < clip.left || clip.right < r.right) + { + for(int y = clip.top; y < clip.bottom; y++) + { + WORD* p = &m_vm16[y << 10]; + + for(int x = r.left; x < clip.left; x++) + { + p[x] = c; + } + + for(int x = clip.right; x < r.right; x++) + { + p[x] = c; + } + } + } + + GSVector4i c128((c << 16) | c); + + for(int y = clip.top; y < clip.bottom; y++) + { + GSVector4i* p = (GSVector4i*)&m_vm16[(y << 10) + clip.left]; + + for(int i = 0, n = (clip.right - clip.left) >> 3; i < n; i++) + { + p[i] = c128; + } + } +} + +void GPULocalMemory::WriteRect(const CRect& r, const WORD* c) +{ + CheckRect(r); + + int w = r.Width(); + + for(int y = r.top; y < r.bottom; y++) + { + WORD* p = &m_vm16[y << 10]; + + memcpy(&p[r.left], c, w * 2); + + c += w; + } +} + +void GPULocalMemory::ReadRect(const CRect& r, WORD* c) +{ + CheckRect(r); + + int w = r.Width(); + + for(int y = r.top; y < r.bottom; y++) + { + WORD* p = &m_vm16[y << 10]; + + memcpy(c, &p[r.left], w * 2); + + c += w; + } +} + +void GPULocalMemory::MoveRect(const CPoint& src, const CPoint& dst, int w, int h) +{ + CheckRect(CRect(src, CSize(w, h))); + CheckRect(CRect(dst, CSize(w, h))); + + WORD* s = &m_vm16[(src.y << 10) + src.x]; + WORD* d = &m_vm16[(dst.y << 10) + dst.x]; + + for(int i = 0; i < h; i++, s += 1024, d += 1024) + { + memcpy(d, s, w * 2); + } +} + +void GPULocalMemory::ReadPage4(int tx, int ty, BYTE* dst) +{ + GSVector4i mask(0x0f0f0f0f); + + WORD* src = &m_vm16[(ty << 18) + (tx << 6)]; + + for(int j = 0; j < 256; j++, src += 1024, dst += 256) + { + GSVector4i* s = (GSVector4i*)src; + GSVector4i* d = (GSVector4i*)dst; + + for(int i = 0; i < 8; i++) + { + GSVector4i c = s[i]; + + GSVector4i l = c & mask; + GSVector4i h = c.andnot(mask) >> 4; + + d[i * 2 + 0] = l.upl8(h); + d[i * 2 + 1] = l.uph8(h); + } + } +} + +void GPULocalMemory::ReadPage8(int tx, int ty, BYTE* dst) +{ + WORD* src = &m_vm16[(ty << 18) + (tx << 6)]; + + for(int j = 0; j < 256; j++, src += 1024, dst += 256) + { + memcpy(dst, src, 256); + } +} + +void GPULocalMemory::ReadPage16(int tx, int ty, WORD* dst) +{ + WORD* src = &m_vm16[(ty << 18) + (tx << 6)]; + + for(int j = 0; j < 256; j++, src += 1024, dst += 256) + { + memcpy(dst, src, 256 * sizeof(WORD)); + } +} + +void GPULocalMemory::ReadPalette4(int cx, int cy, WORD* dst) +{ + memcpy(dst, &m_vm16[(cy << 10) + (cx << 4)], 16 * sizeof(WORD)); +} + +void GPULocalMemory::ReadPalette8(int cx, int cy, WORD* dst) +{ + memcpy(dst, &m_vm16[(cy << 10) + (cx << 4)], 256 * sizeof(WORD)); +} + +void GPULocalMemory::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels) +{ + GSVector4i rm = m_rxxx; + GSVector4i gm = m_xgxx; + GSVector4i bm = m_xxbx; + GSVector4i am = m_xxxa; + + GSVector4i* s = (GSVector4i*)src; + GSVector4i* d = (GSVector4i*)dst; + + for(int i = 0, j = pixels >> 3; i < j; i++) + { + GSVector4i c = s[i]; + + GSVector4i l = c.upl16(); + GSVector4i h = c.uph16(); + + d[i * 2 + 0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | ((l & am) << 16); + d[i * 2 + 1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | ((h & am) << 16); + } +} + +void GPULocalMemory::Expand24(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels) +{ + // TODO: sse + + BYTE* s = (BYTE*)src; + + for(int i = 0; i < pixels; i++, s += 3) + { + dst[i] = (s[2] << 16) | (s[1] << 8) | s[0]; + } +} diff --git a/gsdx/GPULocalMemory.h b/gsdx/GPULocalMemory.h new file mode 100644 index 0000000..99f5865 --- /dev/null +++ b/gsdx/GPULocalMemory.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPU.h" +#include "GSVector.h" + +class GPULocalMemory +{ + static const GSVector4i m_xxxa; + static const GSVector4i m_xxbx; + static const GSVector4i m_xgxx; + static const GSVector4i m_rxxx; + +public: + static const int m_vmsize = 1024 * 1024; + + union {BYTE* m_vm8; WORD* m_vm16; DWORD* m_vm32;}; + +public: + GPULocalMemory(); + virtual ~GPULocalMemory(); + + void FillRect(const CRect& r, WORD c); + void WriteRect(const CRect& r, const WORD* c); + void ReadRect(const CRect& r, WORD* c); + void MoveRect(const CPoint& src, const CPoint& dst, int w, int h); + + void ReadPage4(int tx, int ty, BYTE* dst); + void ReadPage8(int tx, int ty, BYTE* dst); + void ReadPage16(int tx, int ty, WORD* dst); + + void ReadPalette4(int cx, int cy, WORD* dst); + void ReadPalette8(int cx, int cy, WORD* dst); + + static void Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels); + static void Expand24(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels); +}; + +#pragma warning(default: 4244) \ No newline at end of file diff --git a/gsdx/GPURasterizer.cpp b/gsdx/GPURasterizer.cpp new file mode 100644 index 0000000..d852f26 --- /dev/null +++ b/gsdx/GPURasterizer.cpp @@ -0,0 +1,1487 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GPURasterizer.h" + +GPURasterizer::GPURasterizer(GPUState* state, int id, int threads) + : m_state(state) + , m_id(id) + , m_threads(threads) +{ + memset(m_ds, 0, sizeof(m_ds)); + + for(int i = 0; i < countof(m_ds); i++) + { + m_ds[i] = &GPURasterizer::DrawScanline; + } + + /* + m_ds[0x00] = &GPURasterizer::DrawScanlineEx<0x00>; + m_ds[0x01] = &GPURasterizer::DrawScanlineEx<0x01>; + m_ds[0x02] = &GPURasterizer::DrawScanlineEx<0x02>; + m_ds[0x03] = &GPURasterizer::DrawScanlineEx<0x03>; + m_ds[0x04] = &GPURasterizer::DrawScanlineEx<0x04>; + m_ds[0x05] = &GPURasterizer::DrawScanlineEx<0x05>; + m_ds[0x06] = &GPURasterizer::DrawScanlineEx<0x06>; + m_ds[0x07] = &GPURasterizer::DrawScanlineEx<0x07>; + m_ds[0x08] = &GPURasterizer::DrawScanlineEx<0x08>; + m_ds[0x09] = &GPURasterizer::DrawScanlineEx<0x09>; + m_ds[0x0a] = &GPURasterizer::DrawScanlineEx<0x0a>; + m_ds[0x0b] = &GPURasterizer::DrawScanlineEx<0x0b>; + m_ds[0x0c] = &GPURasterizer::DrawScanlineEx<0x0c>; + m_ds[0x0d] = &GPURasterizer::DrawScanlineEx<0x0d>; + m_ds[0x0e] = &GPURasterizer::DrawScanlineEx<0x0e>; + m_ds[0x0f] = &GPURasterizer::DrawScanlineEx<0x0f>; + m_ds[0x10] = &GPURasterizer::DrawScanlineEx<0x10>; + m_ds[0x11] = &GPURasterizer::DrawScanlineEx<0x11>; + m_ds[0x12] = &GPURasterizer::DrawScanlineEx<0x12>; + m_ds[0x13] = &GPURasterizer::DrawScanlineEx<0x13>; + m_ds[0x14] = &GPURasterizer::DrawScanlineEx<0x14>; + m_ds[0x15] = &GPURasterizer::DrawScanlineEx<0x15>; + m_ds[0x16] = &GPURasterizer::DrawScanlineEx<0x16>; + m_ds[0x17] = &GPURasterizer::DrawScanlineEx<0x17>; + m_ds[0x18] = &GPURasterizer::DrawScanlineEx<0x18>; + m_ds[0x19] = &GPURasterizer::DrawScanlineEx<0x19>; + m_ds[0x1a] = &GPURasterizer::DrawScanlineEx<0x1a>; + m_ds[0x1b] = &GPURasterizer::DrawScanlineEx<0x1b>; + m_ds[0x1c] = &GPURasterizer::DrawScanlineEx<0x1c>; + m_ds[0x1d] = &GPURasterizer::DrawScanlineEx<0x1d>; + m_ds[0x1e] = &GPURasterizer::DrawScanlineEx<0x1e>; + m_ds[0x1f] = &GPURasterizer::DrawScanlineEx<0x1f>; + m_ds[0x20] = &GPURasterizer::DrawScanlineEx<0x20>; + m_ds[0x21] = &GPURasterizer::DrawScanlineEx<0x21>; + m_ds[0x22] = &GPURasterizer::DrawScanlineEx<0x22>; + m_ds[0x23] = &GPURasterizer::DrawScanlineEx<0x23>; + m_ds[0x24] = &GPURasterizer::DrawScanlineEx<0x24>; + m_ds[0x25] = &GPURasterizer::DrawScanlineEx<0x25>; + m_ds[0x26] = &GPURasterizer::DrawScanlineEx<0x26>; + m_ds[0x27] = &GPURasterizer::DrawScanlineEx<0x27>; + m_ds[0x28] = &GPURasterizer::DrawScanlineEx<0x28>; + m_ds[0x29] = &GPURasterizer::DrawScanlineEx<0x29>; + m_ds[0x2a] = &GPURasterizer::DrawScanlineEx<0x2a>; + m_ds[0x2b] = &GPURasterizer::DrawScanlineEx<0x2b>; + m_ds[0x2c] = &GPURasterizer::DrawScanlineEx<0x2c>; + m_ds[0x2d] = &GPURasterizer::DrawScanlineEx<0x2d>; + m_ds[0x2e] = &GPURasterizer::DrawScanlineEx<0x2e>; + m_ds[0x2f] = &GPURasterizer::DrawScanlineEx<0x2f>; + m_ds[0x30] = &GPURasterizer::DrawScanlineEx<0x30>; + m_ds[0x31] = &GPURasterizer::DrawScanlineEx<0x31>; + m_ds[0x32] = &GPURasterizer::DrawScanlineEx<0x32>; + m_ds[0x33] = &GPURasterizer::DrawScanlineEx<0x33>; + m_ds[0x34] = &GPURasterizer::DrawScanlineEx<0x34>; + m_ds[0x35] = &GPURasterizer::DrawScanlineEx<0x35>; + m_ds[0x36] = &GPURasterizer::DrawScanlineEx<0x36>; + m_ds[0x37] = &GPURasterizer::DrawScanlineEx<0x37>; + m_ds[0x38] = &GPURasterizer::DrawScanlineEx<0x38>; + m_ds[0x39] = &GPURasterizer::DrawScanlineEx<0x39>; + m_ds[0x3a] = &GPURasterizer::DrawScanlineEx<0x3a>; + m_ds[0x3b] = &GPURasterizer::DrawScanlineEx<0x3b>; + m_ds[0x3c] = &GPURasterizer::DrawScanlineEx<0x3c>; + m_ds[0x3d] = &GPURasterizer::DrawScanlineEx<0x3d>; + m_ds[0x3e] = &GPURasterizer::DrawScanlineEx<0x3e>; + m_ds[0x3f] = &GPURasterizer::DrawScanlineEx<0x3f>; + m_ds[0x40] = &GPURasterizer::DrawScanlineEx<0x40>; + m_ds[0x41] = &GPURasterizer::DrawScanlineEx<0x41>; + m_ds[0x42] = &GPURasterizer::DrawScanlineEx<0x42>; + m_ds[0x43] = &GPURasterizer::DrawScanlineEx<0x43>; + m_ds[0x44] = &GPURasterizer::DrawScanlineEx<0x44>; + m_ds[0x45] = &GPURasterizer::DrawScanlineEx<0x45>; + m_ds[0x46] = &GPURasterizer::DrawScanlineEx<0x46>; + m_ds[0x47] = &GPURasterizer::DrawScanlineEx<0x47>; + m_ds[0x48] = &GPURasterizer::DrawScanlineEx<0x48>; + m_ds[0x49] = &GPURasterizer::DrawScanlineEx<0x49>; + m_ds[0x4a] = &GPURasterizer::DrawScanlineEx<0x4a>; + m_ds[0x4b] = &GPURasterizer::DrawScanlineEx<0x4b>; + m_ds[0x4c] = &GPURasterizer::DrawScanlineEx<0x4c>; + m_ds[0x4d] = &GPURasterizer::DrawScanlineEx<0x4d>; + m_ds[0x4e] = &GPURasterizer::DrawScanlineEx<0x4e>; + m_ds[0x4f] = &GPURasterizer::DrawScanlineEx<0x4f>; + m_ds[0x50] = &GPURasterizer::DrawScanlineEx<0x50>; + m_ds[0x51] = &GPURasterizer::DrawScanlineEx<0x51>; + m_ds[0x52] = &GPURasterizer::DrawScanlineEx<0x52>; + m_ds[0x53] = &GPURasterizer::DrawScanlineEx<0x53>; + m_ds[0x54] = &GPURasterizer::DrawScanlineEx<0x54>; + m_ds[0x55] = &GPURasterizer::DrawScanlineEx<0x55>; + m_ds[0x56] = &GPURasterizer::DrawScanlineEx<0x56>; + m_ds[0x57] = &GPURasterizer::DrawScanlineEx<0x57>; + m_ds[0x58] = &GPURasterizer::DrawScanlineEx<0x58>; + m_ds[0x59] = &GPURasterizer::DrawScanlineEx<0x59>; + m_ds[0x5a] = &GPURasterizer::DrawScanlineEx<0x5a>; + m_ds[0x5b] = &GPURasterizer::DrawScanlineEx<0x5b>; + m_ds[0x5c] = &GPURasterizer::DrawScanlineEx<0x5c>; + m_ds[0x5d] = &GPURasterizer::DrawScanlineEx<0x5d>; + m_ds[0x5e] = &GPURasterizer::DrawScanlineEx<0x5e>; + m_ds[0x5f] = &GPURasterizer::DrawScanlineEx<0x5f>; + m_ds[0x60] = &GPURasterizer::DrawScanlineEx<0x60>; + m_ds[0x61] = &GPURasterizer::DrawScanlineEx<0x61>; + m_ds[0x62] = &GPURasterizer::DrawScanlineEx<0x62>; + m_ds[0x63] = &GPURasterizer::DrawScanlineEx<0x63>; + m_ds[0x64] = &GPURasterizer::DrawScanlineEx<0x64>; + m_ds[0x65] = &GPURasterizer::DrawScanlineEx<0x65>; + m_ds[0x66] = &GPURasterizer::DrawScanlineEx<0x66>; + m_ds[0x67] = &GPURasterizer::DrawScanlineEx<0x67>; + m_ds[0x68] = &GPURasterizer::DrawScanlineEx<0x68>; + m_ds[0x69] = &GPURasterizer::DrawScanlineEx<0x69>; + m_ds[0x6a] = &GPURasterizer::DrawScanlineEx<0x6a>; + m_ds[0x6b] = &GPURasterizer::DrawScanlineEx<0x6b>; + m_ds[0x6c] = &GPURasterizer::DrawScanlineEx<0x6c>; + m_ds[0x6d] = &GPURasterizer::DrawScanlineEx<0x6d>; + m_ds[0x6e] = &GPURasterizer::DrawScanlineEx<0x6e>; + m_ds[0x6f] = &GPURasterizer::DrawScanlineEx<0x6f>; + m_ds[0x70] = &GPURasterizer::DrawScanlineEx<0x70>; + m_ds[0x71] = &GPURasterizer::DrawScanlineEx<0x71>; + m_ds[0x72] = &GPURasterizer::DrawScanlineEx<0x72>; + m_ds[0x73] = &GPURasterizer::DrawScanlineEx<0x73>; + m_ds[0x74] = &GPURasterizer::DrawScanlineEx<0x74>; + m_ds[0x75] = &GPURasterizer::DrawScanlineEx<0x75>; + m_ds[0x76] = &GPURasterizer::DrawScanlineEx<0x76>; + m_ds[0x77] = &GPURasterizer::DrawScanlineEx<0x77>; + m_ds[0x78] = &GPURasterizer::DrawScanlineEx<0x78>; + m_ds[0x79] = &GPURasterizer::DrawScanlineEx<0x79>; + m_ds[0x7a] = &GPURasterizer::DrawScanlineEx<0x7a>; + m_ds[0x7b] = &GPURasterizer::DrawScanlineEx<0x7b>; + m_ds[0x7c] = &GPURasterizer::DrawScanlineEx<0x7c>; + m_ds[0x7d] = &GPURasterizer::DrawScanlineEx<0x7d>; + m_ds[0x7e] = &GPURasterizer::DrawScanlineEx<0x7e>; + m_ds[0x7f] = &GPURasterizer::DrawScanlineEx<0x7f>; + m_ds[0x80] = &GPURasterizer::DrawScanlineEx<0x80>; + m_ds[0x81] = &GPURasterizer::DrawScanlineEx<0x81>; + m_ds[0x82] = &GPURasterizer::DrawScanlineEx<0x82>; + m_ds[0x83] = &GPURasterizer::DrawScanlineEx<0x83>; + m_ds[0x84] = &GPURasterizer::DrawScanlineEx<0x84>; + m_ds[0x85] = &GPURasterizer::DrawScanlineEx<0x85>; + m_ds[0x86] = &GPURasterizer::DrawScanlineEx<0x86>; + m_ds[0x87] = &GPURasterizer::DrawScanlineEx<0x87>; + m_ds[0x88] = &GPURasterizer::DrawScanlineEx<0x88>; + m_ds[0x89] = &GPURasterizer::DrawScanlineEx<0x89>; + m_ds[0x8a] = &GPURasterizer::DrawScanlineEx<0x8a>; + m_ds[0x8b] = &GPURasterizer::DrawScanlineEx<0x8b>; + m_ds[0x8c] = &GPURasterizer::DrawScanlineEx<0x8c>; + m_ds[0x8d] = &GPURasterizer::DrawScanlineEx<0x8d>; + m_ds[0x8e] = &GPURasterizer::DrawScanlineEx<0x8e>; + m_ds[0x8f] = &GPURasterizer::DrawScanlineEx<0x8f>; + m_ds[0x90] = &GPURasterizer::DrawScanlineEx<0x90>; + m_ds[0x91] = &GPURasterizer::DrawScanlineEx<0x91>; + m_ds[0x92] = &GPURasterizer::DrawScanlineEx<0x92>; + m_ds[0x93] = &GPURasterizer::DrawScanlineEx<0x93>; + m_ds[0x94] = &GPURasterizer::DrawScanlineEx<0x94>; + m_ds[0x95] = &GPURasterizer::DrawScanlineEx<0x95>; + m_ds[0x96] = &GPURasterizer::DrawScanlineEx<0x96>; + m_ds[0x97] = &GPURasterizer::DrawScanlineEx<0x97>; + m_ds[0x98] = &GPURasterizer::DrawScanlineEx<0x98>; + m_ds[0x99] = &GPURasterizer::DrawScanlineEx<0x99>; + m_ds[0x9a] = &GPURasterizer::DrawScanlineEx<0x9a>; + m_ds[0x9b] = &GPURasterizer::DrawScanlineEx<0x9b>; + m_ds[0x9c] = &GPURasterizer::DrawScanlineEx<0x9c>; + m_ds[0x9d] = &GPURasterizer::DrawScanlineEx<0x9d>; + m_ds[0x9e] = &GPURasterizer::DrawScanlineEx<0x9e>; + m_ds[0x9f] = &GPURasterizer::DrawScanlineEx<0x9f>; + m_ds[0xa0] = &GPURasterizer::DrawScanlineEx<0xa0>; + m_ds[0xa1] = &GPURasterizer::DrawScanlineEx<0xa1>; + m_ds[0xa2] = &GPURasterizer::DrawScanlineEx<0xa2>; + m_ds[0xa3] = &GPURasterizer::DrawScanlineEx<0xa3>; + m_ds[0xa4] = &GPURasterizer::DrawScanlineEx<0xa4>; + m_ds[0xa5] = &GPURasterizer::DrawScanlineEx<0xa5>; + m_ds[0xa6] = &GPURasterizer::DrawScanlineEx<0xa6>; + m_ds[0xa7] = &GPURasterizer::DrawScanlineEx<0xa7>; + m_ds[0xa8] = &GPURasterizer::DrawScanlineEx<0xa8>; + m_ds[0xa9] = &GPURasterizer::DrawScanlineEx<0xa9>; + m_ds[0xaa] = &GPURasterizer::DrawScanlineEx<0xaa>; + m_ds[0xab] = &GPURasterizer::DrawScanlineEx<0xab>; + m_ds[0xac] = &GPURasterizer::DrawScanlineEx<0xac>; + m_ds[0xad] = &GPURasterizer::DrawScanlineEx<0xad>; + m_ds[0xae] = &GPURasterizer::DrawScanlineEx<0xae>; + m_ds[0xaf] = &GPURasterizer::DrawScanlineEx<0xaf>; + m_ds[0xb0] = &GPURasterizer::DrawScanlineEx<0xb0>; + m_ds[0xb1] = &GPURasterizer::DrawScanlineEx<0xb1>; + m_ds[0xb2] = &GPURasterizer::DrawScanlineEx<0xb2>; + m_ds[0xb3] = &GPURasterizer::DrawScanlineEx<0xb3>; + m_ds[0xb4] = &GPURasterizer::DrawScanlineEx<0xb4>; + m_ds[0xb5] = &GPURasterizer::DrawScanlineEx<0xb5>; + m_ds[0xb6] = &GPURasterizer::DrawScanlineEx<0xb6>; + m_ds[0xb7] = &GPURasterizer::DrawScanlineEx<0xb7>; + m_ds[0xb8] = &GPURasterizer::DrawScanlineEx<0xb8>; + m_ds[0xb9] = &GPURasterizer::DrawScanlineEx<0xb9>; + m_ds[0xba] = &GPURasterizer::DrawScanlineEx<0xba>; + m_ds[0xbb] = &GPURasterizer::DrawScanlineEx<0xbb>; + m_ds[0xbc] = &GPURasterizer::DrawScanlineEx<0xbc>; + m_ds[0xbd] = &GPURasterizer::DrawScanlineEx<0xbd>; + m_ds[0xbe] = &GPURasterizer::DrawScanlineEx<0xbe>; + m_ds[0xbf] = &GPURasterizer::DrawScanlineEx<0xbf>; + m_ds[0xc0] = &GPURasterizer::DrawScanlineEx<0xc0>; + m_ds[0xc1] = &GPURasterizer::DrawScanlineEx<0xc1>; + m_ds[0xc2] = &GPURasterizer::DrawScanlineEx<0xc2>; + m_ds[0xc3] = &GPURasterizer::DrawScanlineEx<0xc3>; + m_ds[0xc4] = &GPURasterizer::DrawScanlineEx<0xc4>; + m_ds[0xc5] = &GPURasterizer::DrawScanlineEx<0xc5>; + m_ds[0xc6] = &GPURasterizer::DrawScanlineEx<0xc6>; + m_ds[0xc7] = &GPURasterizer::DrawScanlineEx<0xc7>; + m_ds[0xc8] = &GPURasterizer::DrawScanlineEx<0xc8>; + m_ds[0xc9] = &GPURasterizer::DrawScanlineEx<0xc9>; + m_ds[0xca] = &GPURasterizer::DrawScanlineEx<0xca>; + m_ds[0xcb] = &GPURasterizer::DrawScanlineEx<0xcb>; + m_ds[0xcc] = &GPURasterizer::DrawScanlineEx<0xcc>; + m_ds[0xcd] = &GPURasterizer::DrawScanlineEx<0xcd>; + m_ds[0xce] = &GPURasterizer::DrawScanlineEx<0xce>; + m_ds[0xcf] = &GPURasterizer::DrawScanlineEx<0xcf>; + m_ds[0xd0] = &GPURasterizer::DrawScanlineEx<0xd0>; + m_ds[0xd1] = &GPURasterizer::DrawScanlineEx<0xd1>; + m_ds[0xd2] = &GPURasterizer::DrawScanlineEx<0xd2>; + m_ds[0xd3] = &GPURasterizer::DrawScanlineEx<0xd3>; + m_ds[0xd4] = &GPURasterizer::DrawScanlineEx<0xd4>; + m_ds[0xd5] = &GPURasterizer::DrawScanlineEx<0xd5>; + m_ds[0xd6] = &GPURasterizer::DrawScanlineEx<0xd6>; + m_ds[0xd7] = &GPURasterizer::DrawScanlineEx<0xd7>; + m_ds[0xd8] = &GPURasterizer::DrawScanlineEx<0xd8>; + m_ds[0xd9] = &GPURasterizer::DrawScanlineEx<0xd9>; + m_ds[0xda] = &GPURasterizer::DrawScanlineEx<0xda>; + m_ds[0xdb] = &GPURasterizer::DrawScanlineEx<0xdb>; + m_ds[0xdc] = &GPURasterizer::DrawScanlineEx<0xdc>; + m_ds[0xdd] = &GPURasterizer::DrawScanlineEx<0xdd>; + m_ds[0xde] = &GPURasterizer::DrawScanlineEx<0xde>; + m_ds[0xdf] = &GPURasterizer::DrawScanlineEx<0xdf>; + m_ds[0xe0] = &GPURasterizer::DrawScanlineEx<0xe0>; + m_ds[0xe1] = &GPURasterizer::DrawScanlineEx<0xe1>; + m_ds[0xe2] = &GPURasterizer::DrawScanlineEx<0xe2>; + m_ds[0xe3] = &GPURasterizer::DrawScanlineEx<0xe3>; + m_ds[0xe4] = &GPURasterizer::DrawScanlineEx<0xe4>; + m_ds[0xe5] = &GPURasterizer::DrawScanlineEx<0xe5>; + m_ds[0xe6] = &GPURasterizer::DrawScanlineEx<0xe6>; + m_ds[0xe7] = &GPURasterizer::DrawScanlineEx<0xe7>; + m_ds[0xe8] = &GPURasterizer::DrawScanlineEx<0xe8>; + m_ds[0xe9] = &GPURasterizer::DrawScanlineEx<0xe9>; + m_ds[0xea] = &GPURasterizer::DrawScanlineEx<0xea>; + m_ds[0xeb] = &GPURasterizer::DrawScanlineEx<0xeb>; + m_ds[0xec] = &GPURasterizer::DrawScanlineEx<0xec>; + m_ds[0xed] = &GPURasterizer::DrawScanlineEx<0xed>; + m_ds[0xee] = &GPURasterizer::DrawScanlineEx<0xee>; + m_ds[0xef] = &GPURasterizer::DrawScanlineEx<0xef>; + m_ds[0xf0] = &GPURasterizer::DrawScanlineEx<0xf0>; + m_ds[0xf1] = &GPURasterizer::DrawScanlineEx<0xf1>; + m_ds[0xf2] = &GPURasterizer::DrawScanlineEx<0xf2>; + m_ds[0xf3] = &GPURasterizer::DrawScanlineEx<0xf3>; + m_ds[0xf4] = &GPURasterizer::DrawScanlineEx<0xf4>; + m_ds[0xf5] = &GPURasterizer::DrawScanlineEx<0xf5>; + m_ds[0xf6] = &GPURasterizer::DrawScanlineEx<0xf6>; + m_ds[0xf7] = &GPURasterizer::DrawScanlineEx<0xf7>; + m_ds[0xf8] = &GPURasterizer::DrawScanlineEx<0xf8>; + m_ds[0xf9] = &GPURasterizer::DrawScanlineEx<0xf9>; + m_ds[0xfa] = &GPURasterizer::DrawScanlineEx<0xfa>; + m_ds[0xfb] = &GPURasterizer::DrawScanlineEx<0xfb>; + m_ds[0xfc] = &GPURasterizer::DrawScanlineEx<0xfc>; + m_ds[0xfd] = &GPURasterizer::DrawScanlineEx<0xfd>; + m_ds[0xfe] = &GPURasterizer::DrawScanlineEx<0xfe>; + m_ds[0xff] = &GPURasterizer::DrawScanlineEx<0xff>; + m_ds[0x100] = &GPURasterizer::DrawScanlineEx<0x100>; + m_ds[0x101] = &GPURasterizer::DrawScanlineEx<0x101>; + m_ds[0x102] = &GPURasterizer::DrawScanlineEx<0x102>; + m_ds[0x103] = &GPURasterizer::DrawScanlineEx<0x103>; + m_ds[0x104] = &GPURasterizer::DrawScanlineEx<0x104>; + m_ds[0x105] = &GPURasterizer::DrawScanlineEx<0x105>; + m_ds[0x106] = &GPURasterizer::DrawScanlineEx<0x106>; + m_ds[0x107] = &GPURasterizer::DrawScanlineEx<0x107>; + m_ds[0x108] = &GPURasterizer::DrawScanlineEx<0x108>; + m_ds[0x109] = &GPURasterizer::DrawScanlineEx<0x109>; + m_ds[0x10a] = &GPURasterizer::DrawScanlineEx<0x10a>; + m_ds[0x10b] = &GPURasterizer::DrawScanlineEx<0x10b>; + m_ds[0x10c] = &GPURasterizer::DrawScanlineEx<0x10c>; + m_ds[0x10d] = &GPURasterizer::DrawScanlineEx<0x10d>; + m_ds[0x10e] = &GPURasterizer::DrawScanlineEx<0x10e>; + m_ds[0x10f] = &GPURasterizer::DrawScanlineEx<0x10f>; + m_ds[0x110] = &GPURasterizer::DrawScanlineEx<0x110>; + m_ds[0x111] = &GPURasterizer::DrawScanlineEx<0x111>; + m_ds[0x112] = &GPURasterizer::DrawScanlineEx<0x112>; + m_ds[0x113] = &GPURasterizer::DrawScanlineEx<0x113>; + m_ds[0x114] = &GPURasterizer::DrawScanlineEx<0x114>; + m_ds[0x115] = &GPURasterizer::DrawScanlineEx<0x115>; + m_ds[0x116] = &GPURasterizer::DrawScanlineEx<0x116>; + m_ds[0x117] = &GPURasterizer::DrawScanlineEx<0x117>; + m_ds[0x118] = &GPURasterizer::DrawScanlineEx<0x118>; + m_ds[0x119] = &GPURasterizer::DrawScanlineEx<0x119>; + m_ds[0x11a] = &GPURasterizer::DrawScanlineEx<0x11a>; + m_ds[0x11b] = &GPURasterizer::DrawScanlineEx<0x11b>; + m_ds[0x11c] = &GPURasterizer::DrawScanlineEx<0x11c>; + m_ds[0x11d] = &GPURasterizer::DrawScanlineEx<0x11d>; + m_ds[0x11e] = &GPURasterizer::DrawScanlineEx<0x11e>; + m_ds[0x11f] = &GPURasterizer::DrawScanlineEx<0x11f>; + m_ds[0x120] = &GPURasterizer::DrawScanlineEx<0x120>; + m_ds[0x121] = &GPURasterizer::DrawScanlineEx<0x121>; + m_ds[0x122] = &GPURasterizer::DrawScanlineEx<0x122>; + m_ds[0x123] = &GPURasterizer::DrawScanlineEx<0x123>; + m_ds[0x124] = &GPURasterizer::DrawScanlineEx<0x124>; + m_ds[0x125] = &GPURasterizer::DrawScanlineEx<0x125>; + m_ds[0x126] = &GPURasterizer::DrawScanlineEx<0x126>; + m_ds[0x127] = &GPURasterizer::DrawScanlineEx<0x127>; + m_ds[0x128] = &GPURasterizer::DrawScanlineEx<0x128>; + m_ds[0x129] = &GPURasterizer::DrawScanlineEx<0x129>; + m_ds[0x12a] = &GPURasterizer::DrawScanlineEx<0x12a>; + m_ds[0x12b] = &GPURasterizer::DrawScanlineEx<0x12b>; + m_ds[0x12c] = &GPURasterizer::DrawScanlineEx<0x12c>; + m_ds[0x12d] = &GPURasterizer::DrawScanlineEx<0x12d>; + m_ds[0x12e] = &GPURasterizer::DrawScanlineEx<0x12e>; + m_ds[0x12f] = &GPURasterizer::DrawScanlineEx<0x12f>; + m_ds[0x130] = &GPURasterizer::DrawScanlineEx<0x130>; + m_ds[0x131] = &GPURasterizer::DrawScanlineEx<0x131>; + m_ds[0x132] = &GPURasterizer::DrawScanlineEx<0x132>; + m_ds[0x133] = &GPURasterizer::DrawScanlineEx<0x133>; + m_ds[0x134] = &GPURasterizer::DrawScanlineEx<0x134>; + m_ds[0x135] = &GPURasterizer::DrawScanlineEx<0x135>; + m_ds[0x136] = &GPURasterizer::DrawScanlineEx<0x136>; + m_ds[0x137] = &GPURasterizer::DrawScanlineEx<0x137>; + m_ds[0x138] = &GPURasterizer::DrawScanlineEx<0x138>; + m_ds[0x139] = &GPURasterizer::DrawScanlineEx<0x139>; + m_ds[0x13a] = &GPURasterizer::DrawScanlineEx<0x13a>; + m_ds[0x13b] = &GPURasterizer::DrawScanlineEx<0x13b>; + m_ds[0x13c] = &GPURasterizer::DrawScanlineEx<0x13c>; + m_ds[0x13d] = &GPURasterizer::DrawScanlineEx<0x13d>; + m_ds[0x13e] = &GPURasterizer::DrawScanlineEx<0x13e>; + m_ds[0x13f] = &GPURasterizer::DrawScanlineEx<0x13f>; + m_ds[0x140] = &GPURasterizer::DrawScanlineEx<0x140>; + m_ds[0x141] = &GPURasterizer::DrawScanlineEx<0x141>; + m_ds[0x142] = &GPURasterizer::DrawScanlineEx<0x142>; + m_ds[0x143] = &GPURasterizer::DrawScanlineEx<0x143>; + m_ds[0x144] = &GPURasterizer::DrawScanlineEx<0x144>; + m_ds[0x145] = &GPURasterizer::DrawScanlineEx<0x145>; + m_ds[0x146] = &GPURasterizer::DrawScanlineEx<0x146>; + m_ds[0x147] = &GPURasterizer::DrawScanlineEx<0x147>; + m_ds[0x148] = &GPURasterizer::DrawScanlineEx<0x148>; + m_ds[0x149] = &GPURasterizer::DrawScanlineEx<0x149>; + m_ds[0x14a] = &GPURasterizer::DrawScanlineEx<0x14a>; + m_ds[0x14b] = &GPURasterizer::DrawScanlineEx<0x14b>; + m_ds[0x14c] = &GPURasterizer::DrawScanlineEx<0x14c>; + m_ds[0x14d] = &GPURasterizer::DrawScanlineEx<0x14d>; + m_ds[0x14e] = &GPURasterizer::DrawScanlineEx<0x14e>; + m_ds[0x14f] = &GPURasterizer::DrawScanlineEx<0x14f>; + m_ds[0x150] = &GPURasterizer::DrawScanlineEx<0x150>; + m_ds[0x151] = &GPURasterizer::DrawScanlineEx<0x151>; + m_ds[0x152] = &GPURasterizer::DrawScanlineEx<0x152>; + m_ds[0x153] = &GPURasterizer::DrawScanlineEx<0x153>; + m_ds[0x154] = &GPURasterizer::DrawScanlineEx<0x154>; + m_ds[0x155] = &GPURasterizer::DrawScanlineEx<0x155>; + m_ds[0x156] = &GPURasterizer::DrawScanlineEx<0x156>; + m_ds[0x157] = &GPURasterizer::DrawScanlineEx<0x157>; + m_ds[0x158] = &GPURasterizer::DrawScanlineEx<0x158>; + m_ds[0x159] = &GPURasterizer::DrawScanlineEx<0x159>; + m_ds[0x15a] = &GPURasterizer::DrawScanlineEx<0x15a>; + m_ds[0x15b] = &GPURasterizer::DrawScanlineEx<0x15b>; + m_ds[0x15c] = &GPURasterizer::DrawScanlineEx<0x15c>; + m_ds[0x15d] = &GPURasterizer::DrawScanlineEx<0x15d>; + m_ds[0x15e] = &GPURasterizer::DrawScanlineEx<0x15e>; + m_ds[0x15f] = &GPURasterizer::DrawScanlineEx<0x15f>; + m_ds[0x160] = &GPURasterizer::DrawScanlineEx<0x160>; + m_ds[0x161] = &GPURasterizer::DrawScanlineEx<0x161>; + m_ds[0x162] = &GPURasterizer::DrawScanlineEx<0x162>; + m_ds[0x163] = &GPURasterizer::DrawScanlineEx<0x163>; + m_ds[0x164] = &GPURasterizer::DrawScanlineEx<0x164>; + m_ds[0x165] = &GPURasterizer::DrawScanlineEx<0x165>; + m_ds[0x166] = &GPURasterizer::DrawScanlineEx<0x166>; + m_ds[0x167] = &GPURasterizer::DrawScanlineEx<0x167>; + m_ds[0x168] = &GPURasterizer::DrawScanlineEx<0x168>; + m_ds[0x169] = &GPURasterizer::DrawScanlineEx<0x169>; + m_ds[0x16a] = &GPURasterizer::DrawScanlineEx<0x16a>; + m_ds[0x16b] = &GPURasterizer::DrawScanlineEx<0x16b>; + m_ds[0x16c] = &GPURasterizer::DrawScanlineEx<0x16c>; + m_ds[0x16d] = &GPURasterizer::DrawScanlineEx<0x16d>; + m_ds[0x16e] = &GPURasterizer::DrawScanlineEx<0x16e>; + m_ds[0x16f] = &GPURasterizer::DrawScanlineEx<0x16f>; + m_ds[0x170] = &GPURasterizer::DrawScanlineEx<0x170>; + m_ds[0x171] = &GPURasterizer::DrawScanlineEx<0x171>; + m_ds[0x172] = &GPURasterizer::DrawScanlineEx<0x172>; + m_ds[0x173] = &GPURasterizer::DrawScanlineEx<0x173>; + m_ds[0x174] = &GPURasterizer::DrawScanlineEx<0x174>; + m_ds[0x175] = &GPURasterizer::DrawScanlineEx<0x175>; + m_ds[0x176] = &GPURasterizer::DrawScanlineEx<0x176>; + m_ds[0x177] = &GPURasterizer::DrawScanlineEx<0x177>; + m_ds[0x178] = &GPURasterizer::DrawScanlineEx<0x178>; + m_ds[0x179] = &GPURasterizer::DrawScanlineEx<0x179>; + m_ds[0x17a] = &GPURasterizer::DrawScanlineEx<0x17a>; + m_ds[0x17b] = &GPURasterizer::DrawScanlineEx<0x17b>; + m_ds[0x17c] = &GPURasterizer::DrawScanlineEx<0x17c>; + m_ds[0x17d] = &GPURasterizer::DrawScanlineEx<0x17d>; + m_ds[0x17e] = &GPURasterizer::DrawScanlineEx<0x17e>; + m_ds[0x17f] = &GPURasterizer::DrawScanlineEx<0x17f>; + m_ds[0x180] = &GPURasterizer::DrawScanlineEx<0x180>; + m_ds[0x181] = &GPURasterizer::DrawScanlineEx<0x181>; + m_ds[0x182] = &GPURasterizer::DrawScanlineEx<0x182>; + m_ds[0x183] = &GPURasterizer::DrawScanlineEx<0x183>; + m_ds[0x184] = &GPURasterizer::DrawScanlineEx<0x184>; + m_ds[0x185] = &GPURasterizer::DrawScanlineEx<0x185>; + m_ds[0x186] = &GPURasterizer::DrawScanlineEx<0x186>; + m_ds[0x187] = &GPURasterizer::DrawScanlineEx<0x187>; + m_ds[0x188] = &GPURasterizer::DrawScanlineEx<0x188>; + m_ds[0x189] = &GPURasterizer::DrawScanlineEx<0x189>; + m_ds[0x18a] = &GPURasterizer::DrawScanlineEx<0x18a>; + m_ds[0x18b] = &GPURasterizer::DrawScanlineEx<0x18b>; + m_ds[0x18c] = &GPURasterizer::DrawScanlineEx<0x18c>; + m_ds[0x18d] = &GPURasterizer::DrawScanlineEx<0x18d>; + m_ds[0x18e] = &GPURasterizer::DrawScanlineEx<0x18e>; + m_ds[0x18f] = &GPURasterizer::DrawScanlineEx<0x18f>; + m_ds[0x190] = &GPURasterizer::DrawScanlineEx<0x190>; + m_ds[0x191] = &GPURasterizer::DrawScanlineEx<0x191>; + m_ds[0x192] = &GPURasterizer::DrawScanlineEx<0x192>; + m_ds[0x193] = &GPURasterizer::DrawScanlineEx<0x193>; + m_ds[0x194] = &GPURasterizer::DrawScanlineEx<0x194>; + m_ds[0x195] = &GPURasterizer::DrawScanlineEx<0x195>; + m_ds[0x196] = &GPURasterizer::DrawScanlineEx<0x196>; + m_ds[0x197] = &GPURasterizer::DrawScanlineEx<0x197>; + m_ds[0x198] = &GPURasterizer::DrawScanlineEx<0x198>; + m_ds[0x199] = &GPURasterizer::DrawScanlineEx<0x199>; + m_ds[0x19a] = &GPURasterizer::DrawScanlineEx<0x19a>; + m_ds[0x19b] = &GPURasterizer::DrawScanlineEx<0x19b>; + m_ds[0x19c] = &GPURasterizer::DrawScanlineEx<0x19c>; + m_ds[0x19d] = &GPURasterizer::DrawScanlineEx<0x19d>; + m_ds[0x19e] = &GPURasterizer::DrawScanlineEx<0x19e>; + m_ds[0x19f] = &GPURasterizer::DrawScanlineEx<0x19f>; + m_ds[0x1a0] = &GPURasterizer::DrawScanlineEx<0x1a0>; + m_ds[0x1a1] = &GPURasterizer::DrawScanlineEx<0x1a1>; + m_ds[0x1a2] = &GPURasterizer::DrawScanlineEx<0x1a2>; + m_ds[0x1a3] = &GPURasterizer::DrawScanlineEx<0x1a3>; + m_ds[0x1a4] = &GPURasterizer::DrawScanlineEx<0x1a4>; + m_ds[0x1a5] = &GPURasterizer::DrawScanlineEx<0x1a5>; + m_ds[0x1a6] = &GPURasterizer::DrawScanlineEx<0x1a6>; + m_ds[0x1a7] = &GPURasterizer::DrawScanlineEx<0x1a7>; + m_ds[0x1a8] = &GPURasterizer::DrawScanlineEx<0x1a8>; + m_ds[0x1a9] = &GPURasterizer::DrawScanlineEx<0x1a9>; + m_ds[0x1aa] = &GPURasterizer::DrawScanlineEx<0x1aa>; + m_ds[0x1ab] = &GPURasterizer::DrawScanlineEx<0x1ab>; + m_ds[0x1ac] = &GPURasterizer::DrawScanlineEx<0x1ac>; + m_ds[0x1ad] = &GPURasterizer::DrawScanlineEx<0x1ad>; + m_ds[0x1ae] = &GPURasterizer::DrawScanlineEx<0x1ae>; + m_ds[0x1af] = &GPURasterizer::DrawScanlineEx<0x1af>; + m_ds[0x1b0] = &GPURasterizer::DrawScanlineEx<0x1b0>; + m_ds[0x1b1] = &GPURasterizer::DrawScanlineEx<0x1b1>; + m_ds[0x1b2] = &GPURasterizer::DrawScanlineEx<0x1b2>; + m_ds[0x1b3] = &GPURasterizer::DrawScanlineEx<0x1b3>; + m_ds[0x1b4] = &GPURasterizer::DrawScanlineEx<0x1b4>; + m_ds[0x1b5] = &GPURasterizer::DrawScanlineEx<0x1b5>; + m_ds[0x1b6] = &GPURasterizer::DrawScanlineEx<0x1b6>; + m_ds[0x1b7] = &GPURasterizer::DrawScanlineEx<0x1b7>; + m_ds[0x1b8] = &GPURasterizer::DrawScanlineEx<0x1b8>; + m_ds[0x1b9] = &GPURasterizer::DrawScanlineEx<0x1b9>; + m_ds[0x1ba] = &GPURasterizer::DrawScanlineEx<0x1ba>; + m_ds[0x1bb] = &GPURasterizer::DrawScanlineEx<0x1bb>; + m_ds[0x1bc] = &GPURasterizer::DrawScanlineEx<0x1bc>; + m_ds[0x1bd] = &GPURasterizer::DrawScanlineEx<0x1bd>; + m_ds[0x1be] = &GPURasterizer::DrawScanlineEx<0x1be>; + m_ds[0x1bf] = &GPURasterizer::DrawScanlineEx<0x1bf>; + m_ds[0x1c0] = &GPURasterizer::DrawScanlineEx<0x1c0>; + m_ds[0x1c1] = &GPURasterizer::DrawScanlineEx<0x1c1>; + m_ds[0x1c2] = &GPURasterizer::DrawScanlineEx<0x1c2>; + m_ds[0x1c3] = &GPURasterizer::DrawScanlineEx<0x1c3>; + m_ds[0x1c4] = &GPURasterizer::DrawScanlineEx<0x1c4>; + m_ds[0x1c5] = &GPURasterizer::DrawScanlineEx<0x1c5>; + m_ds[0x1c6] = &GPURasterizer::DrawScanlineEx<0x1c6>; + m_ds[0x1c7] = &GPURasterizer::DrawScanlineEx<0x1c7>; + m_ds[0x1c8] = &GPURasterizer::DrawScanlineEx<0x1c8>; + m_ds[0x1c9] = &GPURasterizer::DrawScanlineEx<0x1c9>; + m_ds[0x1ca] = &GPURasterizer::DrawScanlineEx<0x1ca>; + m_ds[0x1cb] = &GPURasterizer::DrawScanlineEx<0x1cb>; + m_ds[0x1cc] = &GPURasterizer::DrawScanlineEx<0x1cc>; + m_ds[0x1cd] = &GPURasterizer::DrawScanlineEx<0x1cd>; + m_ds[0x1ce] = &GPURasterizer::DrawScanlineEx<0x1ce>; + m_ds[0x1cf] = &GPURasterizer::DrawScanlineEx<0x1cf>; + m_ds[0x1d0] = &GPURasterizer::DrawScanlineEx<0x1d0>; + m_ds[0x1d1] = &GPURasterizer::DrawScanlineEx<0x1d1>; + m_ds[0x1d2] = &GPURasterizer::DrawScanlineEx<0x1d2>; + m_ds[0x1d3] = &GPURasterizer::DrawScanlineEx<0x1d3>; + m_ds[0x1d4] = &GPURasterizer::DrawScanlineEx<0x1d4>; + m_ds[0x1d5] = &GPURasterizer::DrawScanlineEx<0x1d5>; + m_ds[0x1d6] = &GPURasterizer::DrawScanlineEx<0x1d6>; + m_ds[0x1d7] = &GPURasterizer::DrawScanlineEx<0x1d7>; + m_ds[0x1d8] = &GPURasterizer::DrawScanlineEx<0x1d8>; + m_ds[0x1d9] = &GPURasterizer::DrawScanlineEx<0x1d9>; + m_ds[0x1da] = &GPURasterizer::DrawScanlineEx<0x1da>; + m_ds[0x1db] = &GPURasterizer::DrawScanlineEx<0x1db>; + m_ds[0x1dc] = &GPURasterizer::DrawScanlineEx<0x1dc>; + m_ds[0x1dd] = &GPURasterizer::DrawScanlineEx<0x1dd>; + m_ds[0x1de] = &GPURasterizer::DrawScanlineEx<0x1de>; + m_ds[0x1df] = &GPURasterizer::DrawScanlineEx<0x1df>; + m_ds[0x1e0] = &GPURasterizer::DrawScanlineEx<0x1e0>; + m_ds[0x1e1] = &GPURasterizer::DrawScanlineEx<0x1e1>; + m_ds[0x1e2] = &GPURasterizer::DrawScanlineEx<0x1e2>; + m_ds[0x1e3] = &GPURasterizer::DrawScanlineEx<0x1e3>; + m_ds[0x1e4] = &GPURasterizer::DrawScanlineEx<0x1e4>; + m_ds[0x1e5] = &GPURasterizer::DrawScanlineEx<0x1e5>; + m_ds[0x1e6] = &GPURasterizer::DrawScanlineEx<0x1e6>; + m_ds[0x1e7] = &GPURasterizer::DrawScanlineEx<0x1e7>; + m_ds[0x1e8] = &GPURasterizer::DrawScanlineEx<0x1e8>; + m_ds[0x1e9] = &GPURasterizer::DrawScanlineEx<0x1e9>; + m_ds[0x1ea] = &GPURasterizer::DrawScanlineEx<0x1ea>; + m_ds[0x1eb] = &GPURasterizer::DrawScanlineEx<0x1eb>; + m_ds[0x1ec] = &GPURasterizer::DrawScanlineEx<0x1ec>; + m_ds[0x1ed] = &GPURasterizer::DrawScanlineEx<0x1ed>; + m_ds[0x1ee] = &GPURasterizer::DrawScanlineEx<0x1ee>; + m_ds[0x1ef] = &GPURasterizer::DrawScanlineEx<0x1ef>; + m_ds[0x1f0] = &GPURasterizer::DrawScanlineEx<0x1f0>; + m_ds[0x1f1] = &GPURasterizer::DrawScanlineEx<0x1f1>; + m_ds[0x1f2] = &GPURasterizer::DrawScanlineEx<0x1f2>; + m_ds[0x1f3] = &GPURasterizer::DrawScanlineEx<0x1f3>; + m_ds[0x1f4] = &GPURasterizer::DrawScanlineEx<0x1f4>; + m_ds[0x1f5] = &GPURasterizer::DrawScanlineEx<0x1f5>; + m_ds[0x1f6] = &GPURasterizer::DrawScanlineEx<0x1f6>; + m_ds[0x1f7] = &GPURasterizer::DrawScanlineEx<0x1f7>; + m_ds[0x1f8] = &GPURasterizer::DrawScanlineEx<0x1f8>; + m_ds[0x1f9] = &GPURasterizer::DrawScanlineEx<0x1f9>; + m_ds[0x1fa] = &GPURasterizer::DrawScanlineEx<0x1fa>; + m_ds[0x1fb] = &GPURasterizer::DrawScanlineEx<0x1fb>; + m_ds[0x1fc] = &GPURasterizer::DrawScanlineEx<0x1fc>; + m_ds[0x1fd] = &GPURasterizer::DrawScanlineEx<0x1fd>; + m_ds[0x1fe] = &GPURasterizer::DrawScanlineEx<0x1fe>; + m_ds[0x1ff] = &GPURasterizer::DrawScanlineEx<0x1ff>; + */ +} + +GPURasterizer::~GPURasterizer() +{ +} + +int GPURasterizer::Draw(Vertex* vertices, int count, const void* texture) +{ + GPUDrawingEnvironment& env = m_state->m_env; + + // m_scissor + + m_scissor.x = env.DRAREATL.X; + m_scissor.y = env.DRAREATL.Y; + m_scissor.z = min(env.DRAREABR.X + 1, 1024); + m_scissor.w = min(env.DRAREABR.Y + 1, 512); + + if(m_scissor.x >= m_scissor.z || m_scissor.y >= m_scissor.w) + { + ASSERT(0); + return 0; + } + + // m_sel + + m_sel.dw = 0; + m_sel.iip = env.PRIM.IIP; + m_sel.me = env.STATUS.ME; + m_sel.abe = env.PRIM.ABE; + m_sel.abr = env.STATUS.ABR; + m_sel.tge = env.PRIM.TGE; + m_sel.tme = env.PRIM.TME; + m_sel.tlu = env.STATUS.TP < 2; + m_sel.twin = (env.TWIN.ai32 & 0xfffff) != 0; + m_sel.ltf = 0; // TODO + + m_dsf = m_ds[m_sel]; + + // m_slenv + + m_slenv.steps = 0; + + m_slenv.vm = m_state->m_mem.m_vm16; + + if(m_sel.tme) + { + m_slenv.tex = texture; + m_slenv.clut = m_state->GetCLUT(); + + if(m_sel.twin) + { + DWORD u, v; + + u = ~(env.TWIN.TWW << 3) & 0xff; + v = ~(env.TWIN.TWH << 3) & 0xff; + + m_slenv.u[0] = GSVector4i((u << 16) | u); + m_slenv.v[0] = GSVector4i((v << 16) | v); + + u = env.TWIN.TWX << 3; + v = env.TWIN.TWY << 3; + + m_slenv.u[1] = GSVector4i((u << 16) | u); + m_slenv.v[1] = GSVector4i((v << 16) | v); + } + } + + m_slenv.a = GSVector4i(env.PRIM.ABE ? 0xffffffff : 0); + m_slenv.md = GSVector4i(env.STATUS.MD ? 0x80008000 : 0); + + // TODO + + switch(env.PRIM.TYPE) + { + case GPU_POLYGON: + ASSERT(!(count % 3)); + count = count / 3; + for(int i = 0; i < count; i++, vertices += 3) DrawTriangle(vertices); + break; + case GPU_LINE: + ASSERT(!(count & 1)); + count = count / 2; + for(int i = 0; i < count; i++, vertices += 2) DrawLine(vertices); + break; + case GPU_SPRITE: + ASSERT(!(count & 1)); + count = count / 2; + for(int i = 0; i < count; i++, vertices += 2) DrawSprite(vertices); + break; + default: + __assume(0); + } + + m_state->m_perfmon.Put(GSPerfMon::Fillrate, m_slenv.steps); // TODO: move this to the renderer, not thread safe here + + return count; +} + +void GPURasterizer::DrawPoint(Vertex* v) +{ + // TODO: round to closest for point, prestep for line + + GSVector4i p(v->p); + + if(m_scissor.x <= p.x && p.x < m_scissor.z && m_scissor.y <= p.y && p.y < m_scissor.w) + { + if((p.y % m_threads) == m_id) + { + (this->*m_dsf)(p.y, p.x, p.x + 1, *v); + } + } +} + +void GPURasterizer::DrawLine(Vertex* v) +{ + Vertex dv = v[1] - v[0]; + + GSVector4 dp = dv.p.abs(); + GSVector4i dpi(dp); + + if(dpi.x == 0 && dpi.y == 0) return; + + int i = dpi.x > dpi.y ? 0 : 1; + + Vertex edge = v[0]; + Vertex dedge = dv / dp.v[i]; + + // TODO: prestep + clip with the scissor + + int steps = dpi.v[i]; + + while(steps-- > 0) + { + DrawPoint(&edge); + + edge += dedge; + } +} + +static const int s_abc[8][4] = +{ + {0, 1, 2, 0}, + {1, 0, 2, 0}, + {0, 0, 0, 0}, + {1, 2, 0, 0}, + {0, 2, 1, 0}, + {0, 0, 0, 0}, + {2, 0, 1, 0}, + {2, 1, 0, 0}, +}; + +void GPURasterizer::DrawTriangle(Vertex* vertices) +{ + Vertex v[3]; + + GSVector4 aabb = vertices[0].p.yyyy(vertices[1].p); + GSVector4 bccb = vertices[1].p.yyyy(vertices[2].p).xzzx(); + + int i = (aabb > bccb).mask() & 7; + + v[0] = vertices[s_abc[i][0]]; + v[1] = vertices[s_abc[i][1]]; + v[2] = vertices[s_abc[i][2]]; + + aabb = v[0].p.yyyy(v[1].p); + bccb = v[1].p.yyyy(v[2].p).xzzx(); + + i = (aabb == bccb).mask() & 7; + + switch(i) + { + case 0: // a < b < c + DrawTriangleTopBottom(v); + break; + case 1: // a == b < c + DrawTriangleBottom(v); + break; + case 4: // a < b == c + DrawTriangleTop(v); + break; + case 7: // a == b == c + break; + default: + __assume(0); + } +} + +void GPURasterizer::DrawTriangleTop(Vertex* v) +{ + Vertex longest = v[2] - v[1]; + + if((longest.p == GSVector4::zero()).mask() & 1) + { + return; + } + + Vertex dscan = longest * longest.p.xxxx().rcp(); + + SetupScanline(dscan); + + int i = (longest.p > GSVector4::zero()).mask() & 1; + + Vertex& l = v[0]; + GSVector4 r = v[0].p; + + Vertex vl = v[2 - i] - l; + GSVector4 vr = v[1 + i].p - r; + + Vertex dl = vl / vl.p.yyyy(); + GSVector4 dr = vr / vr.yyyy(); + + GSVector4i tb(l.p.xyxy(v[2].p).ceil()); + + int top = tb.y; + int bottom = tb.w; + + if(top < m_scissor.y) top = m_scissor.y; + if(bottom > m_scissor.w) bottom = m_scissor.w; + + if(top < bottom) + { + float py = (float)top - l.p.y; + + if(py > 0) + { + GSVector4 dy(py); + + l += dl * dy; + r += dr * dy; + } + + DrawTriangleSection(top, bottom, l, dl, r, dr, dscan); + } +} + +void GPURasterizer::DrawTriangleBottom(Vertex* v) +{ + Vertex longest = v[1] - v[0]; + + if((longest.p == GSVector4::zero()).mask() & 1) + { + return; + } + + Vertex dscan = longest * longest.p.xxxx().rcp(); + + SetupScanline(dscan); + + int i = (longest.p > GSVector4::zero()).mask() & 1; + + Vertex& l = v[1 - i]; + GSVector4& r = v[i].p; + + Vertex vl = v[2] - l; + GSVector4 vr = v[2].p - r; + + Vertex dl = vl / vl.p.yyyy(); + GSVector4 dr = vr / vr.yyyy(); + + GSVector4i tb(l.p.xyxy(v[2].p).ceil()); + + int top = tb.y; + int bottom = tb.w; + + if(top < m_scissor.y) top = m_scissor.y; + if(bottom > m_scissor.w) bottom = m_scissor.w; + + if(top < bottom) + { + float py = (float)top - l.p.y; + + if(py > 0) + { + GSVector4 dy(py); + + l += dl * dy; + r += dr * dy; + } + + DrawTriangleSection(top, bottom, l, dl, r, dr, dscan); + } +} + +void GPURasterizer::DrawTriangleTopBottom(Vertex* v) +{ + Vertex v01, v02, v12; + + v01 = v[1] - v[0]; + v02 = v[2] - v[0]; + + Vertex longest = v[0] + v02 * (v01.p / v02.p).yyyy() - v[1]; + + if((longest.p == GSVector4::zero()).mask() & 1) + { + return; + } + + Vertex dscan = longest * longest.p.xxxx().rcp(); + + SetupScanline(dscan); + + Vertex& l = v[0]; + GSVector4 r = v[0].p; + + Vertex dl; + GSVector4 dr; + + bool b = (longest.p > GSVector4::zero()).mask() & 1; + + if(b) + { + dl = v01 / v01.p.yyyy(); + dr = v02.p / v02.p.yyyy(); + } + else + { + dl = v02 / v02.p.yyyy(); + dr = v01.p / v01.p.yyyy(); + } + + GSVector4i tb(v[0].p.yyyy(v[1].p).xzyy(v[2].p).ceil()); + + int top = tb.x; + int bottom = tb.y; + + if(top < m_scissor.y) top = m_scissor.y; + if(bottom > m_scissor.w) bottom = m_scissor.w; + + float py = (float)top - l.p.y; + + if(py > 0) + { + GSVector4 dy(py); + + l += dl * dy; + r += dr * dy; + } + + if(top < bottom) + { + DrawTriangleSection(top, bottom, l, dl, r, dr, dscan); + } + + if(b) + { + v12 = v[2] - v[1]; + + l = v[1]; + + dl = v12 / v12.p.yyyy(); + } + else + { + v12.p = v[2].p - v[1].p; + + r = v[1].p; + + dr = v12.p / v12.p.yyyy(); + } + + top = tb.y; + bottom = tb.z; + + if(top < m_scissor.y) top = m_scissor.y; + if(bottom > m_scissor.w) bottom = m_scissor.w; + + if(top < bottom) + { + py = (float)top - l.p.y; + + if(py > 0) l += dl * py; + + py = (float)top - r.y; + + if(py > 0) r += dr * py; + + DrawTriangleSection(top, bottom, l, dl, r, dr, dscan); + } +} + +void GPURasterizer::DrawTriangleSection(int top, int bottom, Vertex& l, const Vertex& dl, GSVector4& r, const GSVector4& dr, const Vertex& dscan) +{ + ASSERT(top < bottom); + + while(1) + { + do + { + if((top % m_threads) == m_id) + { + GSVector4i lr(l.p.xyxy(r).ceil()); + + int& left = lr.x; + int& right = lr.z; + + if(left < m_scissor.x) left = m_scissor.x; + if(right > m_scissor.z) right = m_scissor.z; + + if(left < right) + { + Vertex scan = l; + + float px = (float)left - l.p.x; + + if(px > 0) scan += dscan * px; + + (this->*m_dsf)(top, left, right, scan); + } + } + } + while(0); + + if(++top >= bottom) break; + + l += dl; + r += dr; + } +} + +void GPURasterizer::DrawSprite(Vertex* v) +{ + GSVector4i r(v[0].p.xyxy(v[1].p).ceil()); + + int& top = r.y; + int& bottom = r.w; + + int& left = r.x; + int& right = r.z; + + #if _M_SSE >= 0x401 + + r = r.sat_i32(m_scissor); + + if((r < r.zwzw()).mask() != 0x00ff) return; + + #else + + if(top < m_scissor.y) top = m_scissor.y; + if(bottom > m_scissor.w) bottom = m_scissor.w; + if(top >= bottom) return; + + if(left < m_scissor.x) left = m_scissor.x; + if(right > m_scissor.z) right = m_scissor.z; + if(left >= right) return; + + #endif + + Vertex scan = v[0]; + + // TODO: solid rect + + if(m_sel.tme) + { + Vertex dedge, dscan; + + GSVector4 one = GSVector4(1.0f).xyxy(GSVector4::zero()); + + dscan.p = one.wwxw(); + dedge.p = one.wwwy(); + + if(scan.p.y < (float)top) scan.p += dedge.p * ((float)top - scan.p.y); + if(scan.p.x < (float)left) scan.p += dscan.p * ((float)left - scan.p.x); + + SetupScanline(dscan); + + for(; top < bottom; top++, scan.p += dedge.p) + { + if((top % m_threads) == m_id) + { + (this->*m_dsf)(top, left, right, scan); + } + } + } + else + { + for(; top < bottom; top++) + { + if((top % m_threads) == m_id) + { + (this->*m_dsf)(top, left, right, scan); + } + } + } +} + +template +void GPURasterizer::SetupScanline(const Vertex& dv) +{ + if(pos) + { + GSVector4 dp = dv.p; + + m_slenv.dp = dp; + m_slenv.dp8 = dp * 8.0f; + } + + if(col) + { + GSVector4 dc = dv.c; + + m_slenv.dc = dc; + m_slenv.dc8 = dc * 8.0f; + } +} + +void GPURasterizer::DrawScanline(int top, int left, int right, const Vertex& v) +{ + GSVector4 ps0123 = GSVector4::ps0123(); + GSVector4 ps4567 = GSVector4::ps4567(); + + GSVector4 s[2], t[2]; + + GSVector4 vp = v.p; + + s[0] = vp.zzzz(); s[1] = s[0]; + t[0] = vp.wwww(); t[1] = t[0]; + + if(m_sel.tme) + { + GSVector4 dp = m_slenv.dp; + + s[0] += dp.zzzz() * ps0123; + t[0] += dp.wwww() * ps0123; + s[1] += dp.zzzz() * ps4567; + t[1] += dp.wwww() * ps4567; + } + + GSVector4 r[2], g[2], b[2]; + + GSVector4 vc = v.c; + + r[0] = vc.xxxx(); r[1] = r[0]; + g[0] = vc.yyyy(); g[1] = g[0]; + b[0] = vc.zzzz(); b[1] = b[0]; + + if(m_sel.iip) + { + GSVector4 dc = m_slenv.dc; + + r[0] += dc.xxxx() * ps0123; + g[0] += dc.yyyy() * ps0123; + b[0] += dc.zzzz() * ps0123; + r[1] += dc.xxxx() * ps4567; + g[1] += dc.yyyy() * ps4567; + b[1] += dc.zzzz() * ps4567; + } + + int steps = right - left; + + m_slenv.steps += steps; + + WORD* fb = &m_slenv.vm[(top << 10) + left]; + + while(1) + { + do + { + int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(8))); + + GSVector4i test = GSVector4i::zero(); + + GSVector4i d = GSVector4i::zero(); + + if(m_sel.rfb) // me | abe + { + d = GSVector4i::load(fb); + + if(m_sel.me) + { + test = d.sra16(15); + + if(test.alltrue()) + { + continue; + } + } + } + + GSVector4i c[4]; + + if(m_sel.tme) + { + SampleTexture(pixels, m_sel.ltf, m_sel.tlu, m_sel.twin, test, s, t, c); + } + else + { + c[3] = m_slenv.a; + } + + ColorTFX(m_sel.tfx, r, g, b, c); + + if(m_sel.abe) + { + AlphaBlend(m_sel.abr, d, c); + } + + WriteFrame(fb, test, c, pixels); + } + while(0); + + if(steps <= 8) break; + + steps -= 8; + + fb += 8; + + if(m_sel.tme) + { + GSVector4 dp8 = m_slenv.dp8; + + s[0] += dp8.zzzz(); + t[0] += dp8.wwww(); + s[1] += dp8.zzzz(); + t[1] += dp8.wwww(); + } + + if(m_sel.iip) + { + GSVector4 dc8 = m_slenv.dc8; + + r[0] += dc8.xxxx(); + g[0] += dc8.yyyy(); + b[0] += dc8.zzzz(); + r[1] += dc8.xxxx(); + g[1] += dc8.yyyy(); + b[1] += dc8.zzzz(); + } + } +} + +template +void GPURasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v) +{ + DWORD iip = (sel >> 0) & 1; + DWORD me = (sel >> 1) & 1; + DWORD abe = (sel >> 2) & 1; + DWORD abr = (sel >> 3) & 3; + DWORD tge = (sel >> 5) & 1; + DWORD tme = (sel >> 6) & 1; + DWORD tlu = (sel >> 7) & 1; + DWORD twin = (sel >> 8) & 1; + DWORD rfb = (sel >> 1) & 3; + DWORD tfx = (sel >> 5) & 3; + + GSVector4 ps0123 = GSVector4::ps0123(); + GSVector4 ps4567 = GSVector4::ps4567(); + + GSVector4 s[2], t[2]; + + GSVector4 vp = v.p; + + s[0] = vp.zzzz(); s[1] = s[0]; + t[0] = vp.wwww(); t[1] = t[0]; + + if(tme) + { + GSVector4 dp = m_slenv.dp; + + s[0] += dp.zzzz() * ps0123; + t[0] += dp.wwww() * ps0123; + s[1] += dp.zzzz() * ps4567; + t[1] += dp.wwww() * ps4567; + } + + GSVector4 r[2], g[2], b[2]; + + GSVector4 vc = v.c; + + r[0] = vc.xxxx(); r[1] = r[0]; + g[0] = vc.yyyy(); g[1] = g[0]; + b[0] = vc.zzzz(); b[1] = b[0]; + + if(iip) + { + GSVector4 dc = m_slenv.dc; + + r[0] += dc.xxxx() * ps0123; + g[0] += dc.yyyy() * ps0123; + b[0] += dc.zzzz() * ps0123; + r[1] += dc.xxxx() * ps4567; + g[1] += dc.yyyy() * ps4567; + b[1] += dc.zzzz() * ps4567; + } + + int steps = right - left; + + m_slenv.steps += steps; + + WORD* fb = &m_slenv.vm[(top << 10) + left]; + + while(1) + { + do + { + int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(8))); + + GSVector4i test = GSVector4i::zero(); + + GSVector4i d = GSVector4i::zero(); + + if(rfb) // me | abe + { + d = GSVector4i::load(fb); + + if(me) + { + test = d.sra16(15); + + if(test.alltrue()) + { + continue; + } + } + } + + GSVector4i c[4]; + + if(tme) + { + SampleTexture(pixels, m_sel.ltf, tlu, twin, test, s, t, c); // TODO: ltf + } + else + { + c[3] = m_slenv.a; + } + + ColorTFX(tfx, r, g, b, c); + + if(abe) + { + AlphaBlend(abr, d, c); + } + + WriteFrame(fb, test, c, pixels); + } + while(0); + + if(steps <= 8) break; + + steps -= 8; + + fb += 8; + + if(tme) + { + GSVector4 dp8 = m_slenv.dp8; + + s[0] += dp8.zzzz(); + t[0] += dp8.wwww(); + s[1] += dp8.zzzz(); + t[1] += dp8.wwww(); + } + + if(iip) + { + GSVector4 dc8 = m_slenv.dc8; + + r[0] += dc8.xxxx(); + g[0] += dc8.yyyy(); + b[0] += dc8.zzzz(); + r[1] += dc8.xxxx(); + g[1] += dc8.yyyy(); + b[1] += dc8.zzzz(); + } + } +} + + +void GPURasterizer::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4* s, const GSVector4* t, GSVector4i* c) +{ + const void* RESTRICT tex = m_slenv.tex; + const WORD* RESTRICT clut = m_slenv.clut; + + if(ltf) + { + // TODO + } + else + { + GSVector4i u, v; + + u = GSVector4i(s[0]).ps32(GSVector4i(s[1])); + v = GSVector4i(t[0]).ps32(GSVector4i(t[1])); + + if(twin) + { + u = (u & m_slenv.u[0]).add16(m_slenv.u[1]); + v = (v & m_slenv.v[0]).add16(m_slenv.v[1]); + } + + GSVector4i uv = u.pu16(v); + + GSVector4i addr = uv.upl8(uv.zwxy()); + + GSVector4i c00; + + #if _M_SSE >= 0x401 + + if(tlu) + { + c00 = addr.gather16_16((const BYTE*)tex).gather16_16(clut); + } + else + { + c00 = addr.gather16_16((const WORD*)tex); + } + + #else + + int i = 0; + + if(tlu) + { + do + { + if(test.u16[i]) // me && + { + continue; + } + + c00.u16[i] = clut[((const BYTE*)tex)[addr.u16[i]]]; + } + while(++i < pixels); + } + else + { + do + { + if(test.u16[i]) // me && + { + continue; + } + + c00.u16[i] = ((const WORD*)tex)[addr.u16[i]]; + } + while(++i < pixels); + } + + #endif + + test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels + + c[0] = (c00 & 0x001f001f) << 3; + c[1] = (c00 & 0x03e003e0) >> 2; + c[2] = (c00 & 0x7c007c00) >> 7; + c[3] = c00.sra16(15); + } +} + +void GPURasterizer::ColorTFX(DWORD tfx, const GSVector4* r, const GSVector4* g, const GSVector4* b, GSVector4i* c) +{ + GSVector4i ri, gi, bi; + + switch(tfx) + { + case 0: // none (tfx = 0) + case 1: // none (tfx = tge) + ri = GSVector4i(r[0]).ps32(GSVector4i(r[1])); + gi = GSVector4i(g[0]).ps32(GSVector4i(g[1])); + bi = GSVector4i(b[0]).ps32(GSVector4i(b[1])); + c[0] = ri; + c[1] = gi; + c[2] = bi; + break; + case 2: // modulate (tfx = tme | tge) + ri = GSVector4i(r[0]).ps32(GSVector4i(r[1])); + gi = GSVector4i(g[0]).ps32(GSVector4i(g[1])); + bi = GSVector4i(b[0]).ps32(GSVector4i(b[1])); + c[0] = c[0].mul16l(ri).srl16(7); + c[1] = c[1].mul16l(gi).srl16(7); + c[2] = c[2].mul16l(bi).srl16(7); + c[0] = c[0].pu16().upl8(); + c[1] = c[1].pu16().upl8(); + c[2] = c[2].pu16().upl8(); + break; + case 3: // decal (tfx = tme) + break; + default: + __assume(0); + } +} +void GPURasterizer::AlphaBlend(UINT32 abr, const GSVector4i& d, GSVector4i* c) +{ + GSVector4i r = (d & 0x001f001f) << 3; + GSVector4i g = (d & 0x03e003e0) >> 2; + GSVector4i b = (d & 0x7c007c00) >> 7; + + switch(abr) + { + case 0: + r = r.add16(c[0]).srl16(1).min_i16(GSVector4i::x00ff()); + g = g.add16(c[1]).srl16(1).min_i16(GSVector4i::x00ff()); + b = b.add16(c[2]).srl16(1).min_i16(GSVector4i::x00ff()); + break; + case 1: + r = r.add16(c[0]).min_i16(GSVector4i::x00ff()); + g = g.add16(c[1]).min_i16(GSVector4i::x00ff()); + b = b.add16(c[2]).min_i16(GSVector4i::x00ff()); + break; + case 2: + r = r.sub16(c[0]).max_i16(GSVector4i::zero()); + g = g.sub16(c[1]).max_i16(GSVector4i::zero()); + b = b.sub16(c[2]).max_i16(GSVector4i::zero()); + break; + case 3: + r = r.add16(c[0].srl16(2)).min_i16(GSVector4i::x00ff()); + g = g.add16(c[1].srl16(2)).min_i16(GSVector4i::x00ff()); + b = b.add16(c[2].srl16(2)).min_i16(GSVector4i::x00ff()); + break; + default: + __assume(0); + } + + c[0] = c[0].blend8(r, c[3]); + c[1] = c[1].blend8(g, c[3]); + c[2] = c[2].blend8(b, c[3]); +} + +void GPURasterizer::WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels) +{ + GSVector4i r = (c[0] & 0x00f800f8) >> 3; + GSVector4i g = (c[1] & 0x00f800f8) << 2; + GSVector4i b = (c[2] & 0x00f800f8) << 7; + + GSVector4i s = r | g | b | m_slenv.md; + + int i = 0; + + do + { + if(test.u16[i] == 0) + { + fb[i] = s.u16[i]; + } + } + while(++i < pixels); +} diff --git a/gsdx/GPURasterizer.h b/gsdx/GPURasterizer.h new file mode 100644 index 0000000..70d7dfc --- /dev/null +++ b/gsdx/GPURasterizer.h @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPUState.h" +#include "GPUVertexSW.h" +#include "GPUTextureCacheSW.h" +#include "GSAlignedClass.h" + +class GPURasterizer : public GSAlignedClass<16> +{ +protected: + typedef GPUVertexSW Vertex; + + GPUState* m_state; + int m_id; + int m_threads; + +private: + + union ScanlineSelector + { + struct + { + DWORD iip:1; // 0 + DWORD me:1; // 1 + DWORD abe:1; // 2 + DWORD abr:2; // 3 + DWORD tge:1; // 5 + DWORD tme:1; // 6 + DWORD tlu:1; // 7 + DWORD twin:1; // 8 + DWORD ltf:1; // 9 + // DWORD dtd:1; // 10 + // DWORD dte:1: // 11 + }; + + struct + { + DWORD _pad1:1; // 0 + DWORD rfb:2; // 1 + DWORD _pad2:2; // 3 + DWORD tfx:2; // 5 + }; + + DWORD dw; + + operator DWORD() {return dw & 0x1ff;} + }; + + __declspec(align(16)) struct ScanlineEnvironment + { + int steps; + + WORD* vm; + + const void* tex; + const WORD* clut; + + GSVector4i u[2]; + GSVector4i v[2]; + + GSVector4i a; + GSVector4i md; // similar to gs fba + + GSVector4 dp, dp8; + GSVector4 dc, dc8; + }; + + GSVector4i m_scissor; + ScanlineSelector m_sel; + ScanlineEnvironment m_slenv; + + template + __forceinline void SetupScanline(const Vertex& dv); + + typedef void (GPURasterizer::*DrawScanlinePtr)(int top, int left, int right, const Vertex& v); + + DrawScanlinePtr m_ds[512], m_dsf; + + void DrawScanline(int top, int left, int right, const Vertex& v); + + template + void DrawScanlineEx(int top, int left, int right, const Vertex& v); + + __forceinline void SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4* s, const GSVector4* t, GSVector4i* c); + __forceinline void ColorTFX(DWORD tfx, const GSVector4* r, const GSVector4* g, const GSVector4* b, GSVector4i* c); + __forceinline void AlphaBlend(UINT32 abr, const GSVector4i& d, GSVector4i* c); + __forceinline void WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels); + + void DrawPoint(Vertex* v); + void DrawLine(Vertex* v); + void DrawTriangle(Vertex* v); + void DrawTriangleTop(Vertex* v); + void DrawTriangleBottom(Vertex* v); + void DrawTriangleTopBottom(Vertex* v); + void DrawSprite(Vertex* v); + + __forceinline void DrawTriangleSection(int top, int bottom, Vertex& l, const Vertex& dl, GSVector4& r, const GSVector4& dr, const Vertex& dscan); + +public: + GPURasterizer(GPUState* state, int id = 0, int threads = 0); + virtual ~GPURasterizer(); + + int Draw(Vertex* v, int count, const void* texture); +}; diff --git a/gsdx/GPURenderer.cpp b/gsdx/GPURenderer.cpp new file mode 100644 index 0000000..6540c31 --- /dev/null +++ b/gsdx/GPURenderer.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GPURenderer.h" diff --git a/gsdx/GPURenderer.h b/gsdx/GPURenderer.h new file mode 100644 index 0000000..9900abc --- /dev/null +++ b/gsdx/GPURenderer.h @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPUState.h" +#include "GSVertexList.h" + +struct GPURendererSettings +{ + bool m_vsync; +}; + +class GPURendererBase : public GPUState, protected GPURendererSettings +{ +protected: + HWND m_hWnd; + +public: + GPURendererBase(const GPURendererSettings& rs) + : m_hWnd(NULL) + { + m_vsync = rs.m_vsync; + } + + virtual bool Create(HWND hWnd) = 0; + virtual void VSync() = 0; + virtual bool MakeSnapshot(LPCTSTR path) = 0; +}; + +template +class GPURenderer : public GPURendererBase +{ +protected: + typedef typename Device::Texture Texture; + + Vertex* m_vertices; + int m_count; + int m_maxcount; + GSVertexList m_vl; + + void Reset() + { + m_count = 0; + m_vl.RemoveAll(); + + __super::Reset(); + } + + void VertexKick() + { + if(m_vl.GetCount() < m_env.PRIM.VTX) + { + return; + } + + if(m_count > m_maxcount) + { + m_maxcount = max(10000, m_maxcount * 3/2); + m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16); + m_maxcount -= 100; + } + + Vertex* v = &m_vertices[m_count]; + + int count = 0; + + switch(m_env.PRIM.TYPE) + { + case GPU_POLYGON: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.GetAt(2, v[2]); + m_vl.RemoveAll(); + count = 3; + break; + case GPU_LINE: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.RemoveAll(); + count = 2; + break; + case GPU_SPRITE: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.RemoveAll(); + count = 2; + break; + default: + ASSERT(0); + m_vl.RemoveAll(); + count = 0; + break; + } + + (this->*m_fpDrawingKickHandlers[m_env.PRIM.TYPE])(v, count); + + m_count += count; + } + + typedef void (GPURenderer::*DrawingKickHandler)(Vertex* v, int& count); + + DrawingKickHandler m_fpDrawingKickHandlers[4]; + + void DrawingKickNull(Vertex* v, int& count) + { + ASSERT(0); + } + + void ResetPrim() + { + m_vl.RemoveAll(); + } + + void FlushPrim() + { + if(m_count > 0) + { + Dump(_T("db")); + + Draw(); + + m_count = 0; + + Dump(_T("dc"), false); + + if(m_env.PRIM.TME) + { + CRect r; + + r.left = m_env.STATUS.TX << 6; + r.top = m_env.STATUS.TY << 8; + r.right = r.left + 256; + r.bottom = r.top + 256; + + CString str; + str.Format(_T("da_%d_%d_%d_%d_%d"), m_env.STATUS.TP, r); + Dump(str, m_env.STATUS.TP, r, false); + } + + } + } + + virtual void ResetDevice() {} + virtual void Draw() = 0; + virtual bool GetOutput(Texture& t) = 0; + + bool Merge() + { + Texture st[2]; + + if(!GetOutput(st[0])) + { + return false; + } + + CSize s; + + s.cx = st[0].GetWidth(); + s.cy = st[0].GetHeight(); + + GSVector4 sr[2]; + + sr[0].x = 0; + sr[0].y = 0; + sr[0].z = 1.0f; + sr[0].w = 1.0f; + + GSVector4 dr[2]; + + dr[0].x = 0; + dr[0].y = 0; + dr[0].z = (float)s.cx; + dr[0].w = (float)s.cy; + + GSVector4 c(0, 0, 0, 1); + + m_dev.Merge(st, sr, dr, s, 1, 1, c); + + return true; + } + +public: + Device m_dev; + +public: + GPURenderer(const GPURendererSettings& rs) + : GPURendererBase(rs) + , m_count(0) + , m_maxcount(10000) + { + m_vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 16); + m_maxcount -= 100; + + for(int i = 0; i < countof(m_fpDrawingKickHandlers); i++) + { + m_fpDrawingKickHandlers[i] = &GPURenderer::DrawingKickNull; + } + } + + virtual ~GPURenderer() + { + if(m_vertices) _aligned_free(m_vertices); + } + + virtual bool Create(HWND hWnd) + { + if(!m_dev.Create(hWnd, m_vsync)) + { + return false; + } + + m_hWnd = hWnd; // TODO + + DWORD style = GetWindowLong(hWnd, GWL_STYLE); + style |= WS_OVERLAPPEDWINDOW; + SetWindowLong(hWnd, GWL_STYLE, style); + UpdateWindow(hWnd); + + ShowWindow(hWnd, SW_SHOWNORMAL); + + // TODO + + Reset(); + + return true; + } + + virtual void VSync() + { + GSPerfMonAutoTimer pmat(m_perfmon); + + // m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ? + + if(!IsWindow(m_hWnd)) + { + return; + } + + Flush(); + + m_perfmon.Put(GSPerfMon::Frame); + + if(!Merge()) + { + return; + } + + // osd + + static UINT64 s_frame = 0; + static CString s_stats; + + if(m_perfmon.GetFrame() - s_frame >= 30) + { + m_perfmon.Update(); + + s_frame = m_perfmon.GetFrame(); + + double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame); + + CRect r = m_env.GetDisplayRect(); + + s_stats.Format( + _T("%I64d | %d x %d | %.2f fps (%d%%) | %d/%d | %d%% CPU | %.2f | %.2f"), + m_perfmon.GetFrame(), r.Width(), r.Height(), fps, (int)(100.0 * fps / m_env.GetFPS()), + (int)m_perfmon.Get(GSPerfMon::Prim), + (int)m_perfmon.Get(GSPerfMon::Draw), + m_perfmon.CPU(), + m_perfmon.Get(GSPerfMon::Swizzle) / 1024, + m_perfmon.Get(GSPerfMon::Unswizzle) / 1024 + ); + + double fillrate = m_perfmon.Get(GSPerfMon::Fillrate); + + if(fillrate > 0) + { + s_stats.Format(_T("%s | %.2f mpps"), CString(s_stats), fps * fillrate / (1024 * 1024)); + } + + SetWindowText(m_hWnd, s_stats); + } + + if(m_dev.IsLost()) + { + ResetDevice(); + } + + CRect r; + + GetClientRect(m_hWnd, &r); + + m_dev.Present(r); + } + + virtual bool MakeSnapshot(LPCTSTR path) + { + CString fn; + + fn.Format(_T("%s_%s"), path, CTime::GetCurrentTime().Format(_T("%Y%m%d%H%M%S"))); + + return m_dev.SaveCurrent(fn + _T(".bmp")); + } +}; diff --git a/gsdx/GPURendererSW.cpp b/gsdx/GPURendererSW.cpp new file mode 100644 index 0000000..706f9d7 --- /dev/null +++ b/gsdx/GPURendererSW.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GPURendererSW.h" diff --git a/gsdx/GPURendererSW.h b/gsdx/GPURendererSW.h new file mode 100644 index 0000000..d9c48c6 --- /dev/null +++ b/gsdx/GPURendererSW.h @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPURenderer.h" +#include "GPUVertexSW.h" +#include "GPURasterizer.h" +#include "GPUTextureCacheSW.h" + +template +class GPURendererSW : public GPURenderer +{ + typedef GPUVertexSW Vertex; + +protected: + long m_threads; + GPUTextureCacheSW* m_tc; + GPURasterizer* m_rst; + Texture m_texture; + + void Reset() + { + m_tc->Invalidate(CRect(0, 0, 1024, 512)); + + __super::Reset(); + } + + void ResetDevice() + { + m_texture = Texture(); + } + + bool GetOutput(Texture& t) + { + CRect r = m_env.GetDisplayRect(); + + if(m_texture.GetWidth() != r.Width() || m_texture.GetHeight() != r.Height()) + { + m_texture = Texture(); + } + + if(!m_texture && !m_dev.CreateTexture(m_texture, r.Width(), r.Height())) + { + return false; + } + + // TODO + static DWORD* buff = (DWORD*)_aligned_malloc(1024 * 512 * 4, 16); + static int pitch = 1024 * 4; + + if(m_env.STATUS.ISRGB24) + { + for(int i = r.top; i < r.bottom; i++) + { + m_mem.Expand24(&m_mem.m_vm16[(i << 10) + r.left], &buff[i << 10], r.Width()); + } + } + else + { + for(int i = r.top; i < r.bottom; i++) + { + m_mem.Expand16(&m_mem.m_vm16[(i << 10) + r.left], &buff[i << 10], r.Width()); + } + } + + r.OffsetRect(-r.TopLeft()); + + m_texture.Update(r, buff, pitch); + + t = m_texture; + + return true; + } + + void VertexKick() + { + Vertex& v = m_vl.AddTail(); + + // x/y + off.x/y should wrap around at +/-1024 + + int x = m_v.XY.X + m_env.DROFF.X; + int y = m_v.XY.Y + m_env.DROFF.Y; + + v.p = GSVector4(x, y, m_v.UV.X, m_v.UV.Y) + GSVector4(0.0f, 0.0f, 0.5f, 0.5f); + v.c = GSVector4((DWORD)m_v.RGB.ai32); + + __super::VertexKick(); + } + + void DrawingKickTriangle(Vertex* v, int& count) + { + // TODO + } + + void DrawingKickLine(Vertex* v, int& count) + { + // TODO + } + + void DrawingKickSprite(Vertex* v, int& count) + { + // TODO + } + + void Draw() + { + const void* texture = NULL; + + if(m_env.PRIM.TME) + { + texture = m_tc->Lookup(m_env.STATUS); + + if(!texture) {ASSERT(0); return;} + } + + int prims = m_rst->Draw(m_vertices, m_count, texture); + + // TODO + { + CRect r; + + r.left = m_env.DRAREATL.X; + r.top = m_env.DRAREATL.Y; + r.right = min(m_env.DRAREABR.X + 1, 1024); + r.bottom = min(m_env.DRAREABR.Y + 1, 512); + + GSVector4 minv(+1e10f); + GSVector4 maxv(-1e10f); + + for(int i = 0, j = m_count; i < j; i++) + { + GSVector4 p = m_vertices[i].p; + + minv = minv.minv(p); + maxv = maxv.maxv(p); + } + + GSVector4i v(minv.xyxy(maxv)); + + r.left = max(r.left, min(r.right, v.x)); + r.top = max(r.top, min(r.bottom, v.y)); + r.right = min(r.right, max(r.left, v.z)); + r.bottom = min(r.bottom, max(r.top, v.w)); + + Invalidate(r); + } + + m_perfmon.Put(GSPerfMon::Prim, prims); + m_perfmon.Put(GSPerfMon::Draw, 1); + } + + void Invalidate(const CRect& r) + { + __super::Invalidate(r); + + m_tc->Invalidate(r); + } + +public: + GPURendererSW(const GPURendererSettings& rs) + : GPURenderer(rs) + { + m_threads = 1; + + m_tc = new GPUTextureCacheSW(this); + + m_rst = new GPURasterizer(this, 0, m_threads); + + m_fpDrawingKickHandlers[GPU_POLYGON] = (DrawingKickHandler)&GPURendererSW::DrawingKickTriangle; + m_fpDrawingKickHandlers[GPU_LINE] = (DrawingKickHandler)&GPURendererSW::DrawingKickLine; + m_fpDrawingKickHandlers[GPU_SPRITE] = (DrawingKickHandler)&GPURendererSW::DrawingKickSprite; + } + + virtual ~GPURendererSW() + { + delete m_tc; + + delete m_rst; + } +}; diff --git a/gsdx/GPUState.cpp b/gsdx/GPUState.cpp new file mode 100644 index 0000000..26e752b --- /dev/null +++ b/gsdx/GPUState.cpp @@ -0,0 +1,822 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GPUState.h" + +GPUState::GPUState() + : s_n(0) +{ + memset(m_status, 0, sizeof(m_status)); + + for(int i = 0; i < countof(m_fpGPUStatusCommandHandlers); i++) + { + m_fpGPUStatusCommandHandlers[i] = &GPUState::SCH_Null; + } + + m_fpGPUStatusCommandHandlers[0x00] = &GPUState::SCH_ResetGPU; + m_fpGPUStatusCommandHandlers[0x01] = &GPUState::SCH_ResetCommandBuffer; + m_fpGPUStatusCommandHandlers[0x02] = &GPUState::SCH_ResetIRQ; + m_fpGPUStatusCommandHandlers[0x03] = &GPUState::SCH_DisplayEnable; + m_fpGPUStatusCommandHandlers[0x04] = &GPUState::SCH_DMASetup; + m_fpGPUStatusCommandHandlers[0x05] = &GPUState::SCH_StartOfDisplayArea; + m_fpGPUStatusCommandHandlers[0x06] = &GPUState::SCH_HorizontalDisplayRange; + m_fpGPUStatusCommandHandlers[0x07] = &GPUState::SCH_VerticalDisplayRange; + m_fpGPUStatusCommandHandlers[0x08] = &GPUState::SCH_DisplayMode; + m_fpGPUStatusCommandHandlers[0x10] = &GPUState::SCH_GPUInfo; + + m_fpGPUPacketHandler[0] = &GPUState::PH_Command; + m_fpGPUPacketHandler[1] = &GPUState::PH_Polygon; + m_fpGPUPacketHandler[2] = &GPUState::PH_Line; + m_fpGPUPacketHandler[3] = &GPUState::PH_Sprite; + m_fpGPUPacketHandler[4] = &GPUState::PH_Move; + m_fpGPUPacketHandler[5] = &GPUState::PH_Write; + m_fpGPUPacketHandler[6] = &GPUState::PH_Read; + m_fpGPUPacketHandler[7] = &GPUState::PH_Environment; + + Reset(); +} + +GPUState::~GPUState() +{ +} + +void GPUState::Reset() +{ + m_env.Reset(); + + memset(&m_v, 0, sizeof(m_v)); +} + +void GPUState::Flush() +{ + FlushPrim(); +} + +void GPUState::SetPrim(GPUReg* r) +{ + if(m_env.PRIM.TYPE != r->PRIM.TYPE) + { + ResetPrim(); + } + + GPURegPRIM PRIM = r->PRIM; + + PRIM.VTX = 0; + + switch(r->PRIM.TYPE) + { + case GPU_POLYGON: + PRIM.ai32 = (r->PRIM.ai32 & 0xF7000000) | 3; // TYPE IIP TME ABE TGE + break; + case GPU_LINE: + PRIM.ai32 = (r->PRIM.ai32 & 0xF2000000) | 2; // TYPE IIP ABE + PRIM.TGE = 1; + break; + case GPU_SPRITE: + PRIM.ai32 = (r->PRIM.ai32 & 0xE6000000) | 2; // TYPE TME ABE + PRIM.TGE = 1; + break; + } + + if(m_env.PRIM.ai32 != PRIM.ai32) + { + Flush(); + + m_env.PRIM = PRIM; + } +} + +void GPUState::SetCLUT(GPUReg* r) +{ + UINT32 mask = 0xFFFF0000; // X Y + + UINT32 value = (m_env.CLUT.ai32 & ~mask) | (r->ai32 & mask); + + if(m_env.CLUT.ai32 != value) + { + Flush(); + + m_env.CLUT.ai32 = value; + } +} + +void GPUState::SetTPAGE(GPUReg* r) +{ + UINT32 mask = 0x000001FF; // TP ABR TY TX + + UINT32 value = (m_env.STATUS.ai32 & ~mask) | ((r->ai32 >> 16) & mask); + + if(m_env.STATUS.ai32 != value) + { + Flush(); + + m_env.STATUS.ai32 = value; + } +} + +void GPUState::WriteData(const BYTE* mem, UINT32 size) +{ + GSPerfMonAutoTimer pmat(m_perfmon); + + size <<= 2; + + m_write.Append(mem, size); + + int i = 0; + + while(i < m_write.bytes) + { + GPUReg* r = (GPUReg*)&m_write.buff[i]; + + int ret = (this->*m_fpGPUPacketHandler[r->PACKET.TYPE])(r, (m_write.bytes - i) >> 2); + + if(ret == 0) return; // need more data + + i += ret << 2; + } + + m_write.Remove(i); +} + +void GPUState::ReadData(BYTE* mem, UINT32 size) +{ + GSPerfMonAutoTimer pmat(m_perfmon); + + int remaining = m_read.bytes - m_read.cur; + + int bytes = (int)size << 2; + + if(bytes > remaining) + { + // ASSERT(0); + + TRACE(_T("WARNING: ReadData\n")); + + // memset(&mem[remaining], 0, bytes - remaining); + + bytes = remaining; + } + + memcpy(mem, &m_read.buff[m_read.cur], bytes); + + m_read.cur += bytes; + + if(m_read.cur >= m_read.bytes) + { + m_env.STATUS.IMG = 0; + } +} + +void GPUState::WriteStatus(UINT32 status) +{ + GSPerfMonAutoTimer pmat(m_perfmon); + + UINT32 b = status >> 24; + + m_status[b] = status; + + (this->*m_fpGPUStatusCommandHandlers[b])((GPUReg*)&status); +} + +UINT32 GPUState::ReadStatus() +{ + GSPerfMonAutoTimer pmat(m_perfmon); + + m_env.STATUS.LCF = ~m_env.STATUS.LCF; // ? + + return m_env.STATUS.ai32; +} + +void GPUState::Freeze(GPUFreezeData* data) +{ + data->status = m_env.STATUS.ai32; + memcpy(data->control, m_status, 256 * 4); + memcpy(data->vram, m_mem.m_vm16, 1024 * 512 * 2); +} + +void GPUState::Defrost(const GPUFreezeData* data) +{ + m_env.STATUS.ai32 = data->status; + memcpy(m_status, data->control, 256 * 4); + memcpy(m_mem.m_vm16, data->vram, 1024 * 512 * 2); + + for(int i = 0; i <= 8; i++) + { + WriteStatus(m_status[i]); + } +} + +void GPUState::SCH_Null(GPUReg* r) +{ + ASSERT(0); +} + +void GPUState::SCH_ResetGPU(GPUReg* r) +{ + Reset(); +} + +void GPUState::SCH_ResetCommandBuffer(GPUReg* r) +{ + // ? +} + +void GPUState::SCH_ResetIRQ(GPUReg* r) +{ + // ? +} + +void GPUState::SCH_DisplayEnable(GPUReg* r) +{ + m_env.STATUS.DEN = r->DEN.DEN; +} + +void GPUState::SCH_DMASetup(GPUReg* r) +{ + m_env.STATUS.DMA = r->DMA.DMA; +} + +void GPUState::SCH_StartOfDisplayArea(GPUReg* r) +{ + m_env.DAREA = r->DAREA; +} + +void GPUState::SCH_HorizontalDisplayRange(GPUReg* r) +{ + m_env.DHRANGE = r->DHRANGE; +} + +void GPUState::SCH_VerticalDisplayRange(GPUReg* r) +{ + m_env.DVRANGE = r->DVRANGE; +} + +void GPUState::SCH_DisplayMode(GPUReg* r) +{ + m_env.STATUS.WIDTH0 = r->DMODE.WIDTH0; + m_env.STATUS.HEIGHT = r->DMODE.HEIGHT; + m_env.STATUS.ISPAL = r->DMODE.ISPAL; + m_env.STATUS.ISRGB24 = r->DMODE.ISRGB24; + m_env.STATUS.ISINTER = r->DMODE.ISINTER; + m_env.STATUS.WIDTH1 = r->DMODE.WIDTH1; +} + +void GPUState::SCH_GPUInfo(GPUReg* r) +{ + UINT32 value = 0; + + switch(r->GPUINFO.PARAM) + { + case 0x2: + value = m_env.TWIN.ai32; + break; + case 0x0: + case 0x1: + case 0x3: + value = m_env.DRAREATL.ai32; + break; + case 0x4: + value = m_env.DRAREABR.ai32; + break; + case 0x5: + case 0x6: + value = m_env.DROFF.ai32; + break; + case 0x7: + value = 2; + break; + case 0x8: + case 0xf: + value = 0xBFC03720; // ? + break; + default: + ASSERT(0); + break; + } + + m_read.RemoveAll(); + m_read.Append((BYTE*)&value, 4); + m_read.cur = 0; +} + +int GPUState::PH_Command(GPUReg* r, int size) +{ + switch(r->PACKET.OPTION) + { + case 0: // ??? + + return 1; + + case 1: // clear cache + + return 1; + + case 2: // fillrect + + if(size < 3) return 0; + + Flush(); + + CRect r2; + + r2.left = r[1].XY.X; + r2.top = r[1].XY.Y; + r2.right = r2.left + r[2].XY.X; + r2.bottom = r2.top + r[2].XY.Y; + + WORD c = (WORD)(((r[0].RGB.R >> 3) << 10) | ((r[0].RGB.R >> 3) << 5) | (r[0].RGB.R >> 3)); + + m_mem.FillRect(r2, c); + + Invalidate(r2); + + Dump(_T("f")); + + return 3; + } + + ASSERT(0); + + return 1; +} + +int GPUState::PH_Polygon(GPUReg* r, int size) +{ + int required = 1; + + int vertices = r[0].POLYGON.VTX ? 4 : 3; + + required += vertices; + + if(r[0].POLYGON.TME) required += vertices; + + if(r[0].POLYGON.IIP) required += vertices - 1; + + if(size < required) return 0; + + // + + SetPrim(r); + + if(r[0].POLYGON.TME) + { + SetCLUT(&r[2]); + + SetTPAGE(&r[r[0].POLYGON.IIP ? 5 : 4]); + } + + // + + GPUVertex v[4]; + + for(int i = 0, j = 0; j < vertices; j++) + { + v[j].RGB = r[r[0].POLYGON.IIP ? i : 0].RGB; + + if(j == 0 || r[0].POLYGON.IIP) i++; + + v[j].XY = r[i++].XY; + + if(r[0].POLYGON.TME) + { + v[j].UV.X = r[i].UV.U; + v[j].UV.Y = r[i].UV.V; + + i++; + } + } + + for(int i = 0; i <= vertices - 3; i++) + { + for(int j = 0; j < 3; j++) + { + m_v = v[i + j]; + + VertexKick(); + } + } + + // + + return required; +} + +int GPUState::PH_Line(GPUReg* r, int size) +{ + int required = 1; + + int vertices = 0; + + if(r->LINE.PLL) + { + required++; + + for(int i = 1; i < size; i++) + { + if(r[i].ai32 == 0x55555555) + { + vertices = i - 1; + } + } + + if(vertices < 2) + { + return 0; + } + } + else + { + vertices = 2; + } + + required += vertices; + + if(r->LINE.IIP) required += vertices - 1; + + // + + SetPrim(r); + + // + + for(int i = 0, j = 0; j < vertices; j++) + { + if(j >= 2) VertexKick(); + + m_v.RGB = r[r[0].LINE.IIP ? i : 0].RGB; + + if(j == 0 || r[0].LINE.IIP) i++; + + m_v.XY = r[i++].XY; + + VertexKick(); + } + + // + + return required; +} + +int GPUState::PH_Sprite(GPUReg* r, int size) +{ + int required = 2; + + if(r[0].SPRITE.TME) required++; + if(r[0].SPRITE.SIZE == 0) required++; + + if(size < required) return 0; + + // + + SetPrim(r); + + if(r[0].SPRITE.TME) + { + SetCLUT(&r[2]); + } + + // + + int i = 0; + + m_v.RGB = r[i++].RGB; + + m_v.XY = r[i++].XY; + + if(r[0].SPRITE.TME) + { + m_v.UV.X = r[i].UV.U; + m_v.UV.Y = r[i].UV.V; + + i++; + } + + VertexKick(); + + int w = 0; + int h = 0; + + switch(r[0].SPRITE.SIZE) + { + case 0: w = r[i].XY.X; h = r[i].XY.Y; i++; break; + case 1: w = h = 1; break; + case 2: w = h = 8; break; + case 3: w = h = 16; break; + default: __assume(0); + } + + m_v.XY.X += w; + m_v.XY.Y += h; + + if(r[0].SPRITE.TME) + { + m_v.UV.X += w; + m_v.UV.Y += h; + } + + VertexKick(); + + // + + return required; +} + +int GPUState::PH_Move(GPUReg* r, int size) +{ + if(size < 4) return 0; + + Flush(); + + CPoint src, dst; + + src.x = r[1].XY.X; + src.y = r[1].XY.Y; + + dst.x = r[2].XY.X; + dst.y = r[2].XY.Y; + + int w = r[3].XY.X; + int h = r[3].XY.Y; + + m_mem.MoveRect(src, dst, w, h); + + Invalidate(CRect(dst, CSize(w, h))); + + Dump(_T("m")); + + return 4; +} + +int GPUState::PH_Write(GPUReg* r, int size) +{ + if(size < 3) return 0; + + int w = r[2].XY.X; + int h = r[2].XY.Y; + + int required = 3 + ((w * h + 1) >> 1); + + if(size < required) return 0; + + Flush(); + + CRect r2; + + r2.left = r[1].XY.X; + r2.top = r[1].XY.Y; + r2.right = r2.left + w; + r2.bottom = r2.top + h; + + m_mem.WriteRect(r2, (const WORD*)&r[3]); + + Invalidate(r2); + + Dump(_T("w")); + + m_perfmon.Put(GSPerfMon::Swizzle, w * h * 2); + + return required; +} + +int GPUState::PH_Read(GPUReg* r, int size) +{ + if(size < 3) return 0; + + Flush(); + + int w = r[2].XY.X; + int h = r[2].XY.Y; + + CRect r2; + + r2.left = r[1].XY.X; + r2.top = r[1].XY.Y; + r2.right = r2.left + w; + r2.bottom = r2.top + h; + + m_read.bytes = ((w * h + 1) & ~1) * 2; + m_read.cur = 0; + m_read.Reserve(m_read.bytes); + m_mem.ReadRect(r2, (WORD*)m_read.buff); + + Invalidate(r2); + + m_env.STATUS.IMG = 1; + + return 3; +} + +int GPUState::PH_Environment(GPUReg* r, int size) +{ + Flush(); // TODO: only call when something really changes + + switch(r->PACKET.OPTION) + { + case 1: // draw mode setting + + m_env.STATUS.TX = r->MODE.TX; + m_env.STATUS.TY = r->MODE.TY; + m_env.STATUS.ABR = r->MODE.ABR; + m_env.STATUS.TP = r->MODE.TP; + m_env.STATUS.DTD = r->MODE.DTD; + m_env.STATUS.DFE = r->MODE.DFE; + // ? + // m_env.STATUS.MD = r->MODE.MD; + // m_env.STATUS.ME = r->MODE.ME; + + // mirror bits? + + return 1; + + case 2: // texture window setting + + m_env.TWIN = r->TWIN; + + return 1; + + case 3: // set drawing area top left + + m_env.DRAREATL = r->DRAREA; + + return 1; + + case 4: // set drawing area bottom right + + m_env.DRAREABR = r->DRAREA; + + return 1; + + case 5: // drawing offset + + m_env.DROFF = r->DROFF; + + return 1; + + case 6: // mask setting + + m_env.STATUS.MD = r->MASK.MD; + m_env.STATUS.ME = r->MASK.ME; + + return 1; + } + + ASSERT(0); + + return 1; +} + +void GPUState::SaveBMP(LPCTSTR path, UINT32 TP, CRect r) +{ + r.left &= ~1; + r.right &= ~1; + + if(FILE* fp = _tfopen(path, _T("wb"))) + { + BITMAPINFOHEADER bih; + memset(&bih, 0, sizeof(bih)); + bih.biSize = sizeof(bih); + bih.biWidth = r.Width(); + bih.biHeight = r.Height(); + bih.biPlanes = 1; + bih.biBitCount = 32; + bih.biCompression = BI_RGB; + bih.biSizeImage = bih.biWidth * bih.biHeight * 4; + + BITMAPFILEHEADER bfh; + memset(&bfh, 0, sizeof(bfh)); + bfh.bfType = 'MB'; + bfh.bfOffBits = sizeof(bfh) + sizeof(bih); + bfh.bfSize = bfh.bfOffBits + bih.biSizeImage; + bfh.bfReserved1 = bfh.bfReserved2 = 0; + + fwrite(&bfh, 1, sizeof(bfh), fp); + fwrite(&bih, 1, sizeof(bih), fp); + + WORD* buff = (WORD*)_aligned_malloc(sizeof(WORD) * 1024, 16); + DWORD* buff32 = (DWORD*)_aligned_malloc(sizeof(DWORD) * 1024, 16); + WORD* clut = GetCLUT(); + + for(int j = r.bottom - 1; j >= r.top; j--) + { + WORD* p = &m_mem.m_vm16[(j << 10) + r.left]; + + if(TP == 0) // 4 bpp + { + for(int i = 0, k = r.Width(); i < k; i++) + { + buff[i] = clut[(i & 1) == 0 ? (((BYTE*)p)[i] & 0xf) : (((BYTE*)p)[i] >> 4)]; + } + } + else if(TP == 1) // 8 bpp + { + for(int i = 0, k = r.Width(); i < k; i++) + { + buff[i] = clut[((BYTE*)p)[i]]; + } + } + else if(TP == 2) // 16 bpp; + { + for(int i = 0, k = r.Width(); i < k; i++) + { + buff[i] = p[i]; + } + } + else if(TP == 3) // 24 bpp + { + // TODO + } + + m_mem.Expand16(buff, buff32, r.Width()); + + for(int i = 0, k = r.Width(); i < k; i++) + { + buff32[i] = (buff32[i] & 0xff00ff00) | ((buff32[i] & 0x00ff0000) >> 16) | ((buff32[i] & 0x000000ff) << 16); + } + + fwrite(buff32, 1, r.Width() * 4, fp); + } + + _aligned_free(buff); + _aligned_free(buff32); + + fclose(fp); + } +} + +// + +GPUState::Buffer::Buffer() +{ + bytes = 0; + maxbytes = 4096; + buff = (BYTE*)_aligned_malloc(maxbytes, 16); + cur = 0; +} + +GPUState::Buffer::~Buffer() +{ + _aligned_free(buff); +} + +void GPUState::Buffer::Reserve(int size) +{ + if(size > maxbytes) + { + maxbytes = (maxbytes + size + 1023) & ~1023; + + buff = (BYTE*)_aligned_realloc(buff, maxbytes, 16); + } +} + +void GPUState::Buffer::Append(const BYTE* src, int size) +{ + Reserve(bytes + (int)size); + + memcpy(&buff[bytes], src, size); + + bytes += size; +} + +void GPUState::Buffer::Remove(int size) +{ + ASSERT(size <= bytes); + + if(size < bytes) + { + memmove(&buff[0], &buff[size], bytes - size); + + bytes -= size; + } + else + { + bytes = 0; + } + + #ifdef DEBUG + memset(&buff[bytes], 0xff, maxbytes - bytes); + #endif +} + +void GPUState::Buffer::RemoveAll() +{ + bytes = 0; +} diff --git a/gsdx/GPUState.h b/gsdx/GPUState.h new file mode 100644 index 0000000..9b8a14c --- /dev/null +++ b/gsdx/GPUState.h @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPU.h" +#include "GPUDrawingEnvironment.h" +#include "GPULocalMemory.h" +#include "GPUVertex.h" +#include "GSAlignedClass.h" +#include "GSUtil.h" +#include "GSPerfMon.h" + +class GPUState : public GSAlignedClass<16> +{ + typedef void (GPUState::*GPUStatusCommandHandler)(GPUReg* r); + + GPUStatusCommandHandler m_fpGPUStatusCommandHandlers[256]; + + void SCH_Null(GPUReg* r); + void SCH_ResetGPU(GPUReg* r); + void SCH_ResetCommandBuffer(GPUReg* r); + void SCH_ResetIRQ(GPUReg* r); + void SCH_DisplayEnable(GPUReg* r); + void SCH_DMASetup(GPUReg* r); + void SCH_StartOfDisplayArea(GPUReg* r); + void SCH_HorizontalDisplayRange(GPUReg* r); + void SCH_VerticalDisplayRange(GPUReg* r); + void SCH_DisplayMode(GPUReg* r); + void SCH_GPUInfo(GPUReg* r); + + typedef int (GPUState::*GPUPacketHandler)(GPUReg* r, int size); + + GPUPacketHandler m_fpGPUPacketHandler[8]; + + int PH_Command(GPUReg* r, int size); + int PH_Polygon(GPUReg* r, int size); + int PH_Line(GPUReg* r, int size); + int PH_Sprite(GPUReg* r, int size); + int PH_Move(GPUReg* r, int size); + int PH_Write(GPUReg* r, int size); + int PH_Read(GPUReg* r, int size); + int PH_Environment(GPUReg* r, int size); + + class Buffer + { + public: + int bytes; + int maxbytes; + BYTE* buff; + int cur; + + public: + Buffer(); + ~Buffer(); + void Reserve(int size); + void Append(const BYTE* src, int size); + void Remove(int size); + void RemoveAll(); + }; + + Buffer m_write; + Buffer m_read; + + void SetPrim(GPUReg* r); + void SetCLUT(GPUReg* r); + void SetTPAGE(GPUReg* r); + +protected: + + int s_n; + + void Dump(LPCTSTR s, UINT32 TP, const CRect& r, int inc = true) + { + if(inc) s_n++; + + //if(s_n < 3000 || s_n > 3100) return; + //if(m_perfmon.GetFrame() < 1000) + //if((m_env.TWIN.ai32 & 0xfffff) == 0) + return; + + int dir = 1; +#ifdef DEBUG + dir = 2; +#endif + CString str; + str.Format(_T("c:\\temp%d\\%04d_%s.bmp"), dir, s_n, s); + SaveBMP(str, TP, r); + } + + void Dump(LPCTSTR s, int inc = true) + { + Dump(s, 2, CRect(0, 0, 1024, 512), inc); + } + + void SaveBMP(LPCTSTR path, UINT32 TP, CRect r); + +public: + GPUDrawingEnvironment m_env; + GPULocalMemory m_mem; + GPUVertex m_v; + GSPerfMon m_perfmon; + UINT32 m_status[256]; + +public: + GPUState(); + virtual ~GPUState(); + + virtual void Reset(); + virtual void Flush(); + virtual void FlushPrim() = 0; + virtual void ResetPrim() = 0; + virtual void VertexKick() = 0; + virtual void Invalidate(const CRect& r) {} + + WORD* GetCLUT() {return &m_mem.m_vm16[(m_env.CLUT.Y << 10) + (m_env.CLUT.X << 4)];} + + void WriteData(const BYTE* mem, UINT32 size); + void ReadData(BYTE* mem, UINT32 size); + + void WriteStatus(UINT32 status); + UINT32 ReadStatus(); + + void Freeze(GPUFreezeData* data); + void Defrost(const GPUFreezeData* data); +}; + diff --git a/gsdx/GPUTextureCacheSW.cpp b/gsdx/GPUTextureCacheSW.cpp new file mode 100644 index 0000000..03fb896 --- /dev/null +++ b/gsdx/GPUTextureCacheSW.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "StdAfx.h" +#include "GPUTextureCacheSW.h" + +GPUTextureCacheSW::GPUTextureCacheSW(GPUState* state) + : m_state(state) +{ + int size = 256 * 256 * (1 + 1 + 4) * 32; + + m_buff[0] = (BYTE*)VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + m_buff[1] = m_buff[0] + 256 * 256 * 32; + m_buff[2] = m_buff[1] + 256 * 256 * 32; + + memset(m_buff[0], 0, size); + + memset(m_valid, 0, sizeof(m_valid)); + + for(int y = 0, offset = 0; y < 2; y++) + { + for(int x = 0; x < 16; x++, offset += 256 * 256) + { + m_texture[0][y][x] = &((BYTE*)m_buff[0])[offset]; + m_texture[1][y][x] = &((BYTE*)m_buff[1])[offset]; + } + } + + for(int y = 0, offset = 0; y < 2; y++) + { + for(int x = 0; x < 16; x++, offset += 256 * 256) + { + m_texture[2][y][x] = &((DWORD*)m_buff[2])[offset]; + } + } +} + +GPUTextureCacheSW::~GPUTextureCacheSW() +{ + VirtualFree(m_buff[0], 0, MEM_RELEASE); +} + +const void* GPUTextureCacheSW::Lookup(const GPURegSTATUS& TPAGE) +{ + if(TPAGE.TP == 3) + { + ASSERT(0); + + return NULL; + } + + void* buff = m_texture[TPAGE.TP][TPAGE.TY][TPAGE.TX]; + + UINT32 flag = 1 << TPAGE.TX; + + if(TPAGE.TY) flag <<= 16; + + if((m_valid[TPAGE.TP] & flag) == 0) + { + int bpp = 0; + + switch(TPAGE.TP) + { + case 0: + m_state->m_mem.ReadPage4(TPAGE.TX, TPAGE.TY, (BYTE*)buff); + bpp = 4; + break; + case 1: + m_state->m_mem.ReadPage8(TPAGE.TX, TPAGE.TY, (BYTE*)buff); + bpp = 8; + break; + case 2: + case 3: + m_state->m_mem.ReadPage16(TPAGE.TX, TPAGE.TY, (WORD*)buff); + bpp = 16; + default: + // FIXME: __assume(0); // vc9 generates bogus code in release mode + break; + } + + m_state->m_perfmon.Put(GSPerfMon::Unswizzle, 256 * 256 * bpp >> 3); + + m_valid[TPAGE.TP] |= flag; + } + + return buff; +} + +void GPUTextureCacheSW::Invalidate(const CRect& r) +{ + for(int y = 0, ye = min(r.bottom, 512), j = 0; y < ye; y += 256, j += 16) + { + if(r.top >= y + 256) continue; + + for(int x = 0, xe = min(r.right, 1024), i = 0; x < xe; x += 64, i++) + { + DWORD flag = (1 << i) << j; + + if(r.left >= x + 256) continue; + + m_valid[2] &= ~flag; + + if(r.left >= x + 128) continue; + + m_valid[1] &= ~flag; + + if(r.left >= x + 64) continue; + + m_valid[0] &= ~flag; + } + } +} \ No newline at end of file diff --git a/gsdx/GPUTextureCacheSW.h b/gsdx/GPUTextureCacheSW.h new file mode 100644 index 0000000..0a4df4d --- /dev/null +++ b/gsdx/GPUTextureCacheSW.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPURenderer.h" + +class GPUTextureCacheSW +{ +protected: + GPUState* m_state; + BYTE* m_buff[3]; + void* m_texture[3][2][16]; + DWORD m_valid[3]; + +public: + GPUTextureCacheSW(GPUState* state); + virtual ~GPUTextureCacheSW(); + + const void* Lookup(const GPURegSTATUS& TPAGE); + + void Invalidate(const CRect& r); +}; diff --git a/gsdx/GPUVertex.h b/gsdx/GPUVertex.h new file mode 100644 index 0000000..d4e0cfa --- /dev/null +++ b/gsdx/GPUVertex.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GPU.h" +#include "GSVector.h" + +#pragma pack(push, 1) + +__declspec(align(16)) struct GPUVertex +{ + union + { + struct + { + GPURegRGB RGB; + GPURegXY XY; + GPURegXY UV; + }; + + struct {__m128i m128i;}; + struct {__m128 m128;}; + }; + + GPUVertex() {memset(this, 0, sizeof(*this));} +}; + +struct GPUVertexNull +{ +}; + +#pragma pack(pop) diff --git a/gsdx/GPUVertexSW.h b/gsdx/GPUVertexSW.h new file mode 100644 index 0000000..26bcdd3 --- /dev/null +++ b/gsdx/GPUVertexSW.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSVector.h" + +__declspec(align(16)) union GPUVertexSW +{ + typedef GSVector4 Vector; + + struct {Vector p, c;}; + struct {Vector v[2];}; + struct {float f[8];}; + + GPUVertexSW() {} + GPUVertexSW(const GPUVertexSW& v) {*this = v;} + + void operator = (const GPUVertexSW& v) {c = v.c; p = v.p;} + void operator += (const GPUVertexSW& v) {c += v.c; p += v.p;} + + friend GPUVertexSW operator + (const GPUVertexSW& v1, const GPUVertexSW& v2); + friend GPUVertexSW operator - (const GPUVertexSW& v1, const GPUVertexSW& v2); + friend GPUVertexSW operator * (const GPUVertexSW& v, const Vector& vv); + friend GPUVertexSW operator / (const GPUVertexSW& v, const Vector& vv); + friend GPUVertexSW operator * (const GPUVertexSW& v, float f); + friend GPUVertexSW operator / (const GPUVertexSW& v, float f); +}; + +__forceinline GPUVertexSW operator + (const GPUVertexSW& v1, const GPUVertexSW& v2) +{ + GPUVertexSW v0; + v0.c = v1.c + v2.c; + v0.p = v1.p + v2.p; + return v0; +} + +__forceinline GPUVertexSW operator - (const GPUVertexSW& v1, const GPUVertexSW& v2) +{ + GPUVertexSW v0; + v0.c = v1.c - v2.c; + v0.p = v1.p - v2.p; + return v0; +} + +__forceinline GPUVertexSW operator * (const GPUVertexSW& v, const GPUVertexSW::Vector& vv) +{ + GPUVertexSW v0; + v0.c = v.c * vv; + v0.p = v.p * vv; + return v0; +} + +__forceinline GPUVertexSW operator / (const GPUVertexSW& v, const GPUVertexSW::Vector& vv) +{ + GPUVertexSW v0; + v0.c = v.c / vv; + v0.p = v.p / vv; + return v0; +} + +__forceinline GPUVertexSW operator * (const GPUVertexSW& v, float f) +{ + GPUVertexSW v0; + GPUVertexSW::Vector vf(f); + v0.c = v.c * vf; + v0.p = v.p * vf; + return v0; +} + +__forceinline GPUVertexSW operator / (const GPUVertexSW& v, float f) +{ + GPUVertexSW v0; + GPUVertexSW::Vector vf(f); + v0.c = v.c / vf; + v0.p = v.p / vf; + return v0; +} diff --git a/gsdx/GS.cpp b/gsdx/GS.cpp new file mode 100644 index 0000000..cfc2728 --- /dev/null +++ b/gsdx/GS.cpp @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2007 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSUtil.h" +#include "GSRendererHW9.h" +#include "GSRendererHW10.h" +#include "GSRendererSW.h" +#include "GSRendererNull.h" +#include "GSSettingsDlg.h" + +#define PS2E_LT_GS 0x01 +#define PS2E_GS_VERSION 0x0006 +#define PS2E_X86 0x01 // 32 bit +#define PS2E_X86_64 0x02 // 64 bit + +static HRESULT s_hr = E_FAIL; +static GSRendererBase* s_gs = NULL; +static void (*s_irq)() = NULL; +static BYTE* s_basemem = NULL; + +EXPORT_C_(UINT32) PS2EgetLibType() +{ + return PS2E_LT_GS; +} + +EXPORT_C_(char*) PS2EgetLibName() +{ + return GSUtil::GetLibName(); +} + +EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type) +{ + const UINT32 revision = 0; + const UINT32 build = 1; + + return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (PLUGIN_VERSION << 24); +} + +EXPORT_C_(UINT32) PS2EgetCpuPlatform() +{ +#if _M_AMD64 + return PS2E_X86_64; +#else + return PS2E_X86; +#endif +} + +EXPORT_C GSsetBaseMem(BYTE* mem) +{ + s_basemem = mem - 0x12000000; +} + +EXPORT_C_(INT32) GSinit() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + return 0; +} + +EXPORT_C GSshutdown() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); +} + +EXPORT_C GSclose() +{ + delete s_gs; + + s_gs = NULL; + + if(SUCCEEDED(s_hr)) + { + ::CoUninitialize(); + + s_hr = E_FAIL; + } +} + +static INT32 GSopen(void* dsp, char* title, int mt, int renderer) +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + if(!GSUtil::CheckDirectX() || !GSUtil::CheckSSE()) + { + return -1; + } + + GSclose(); + + // TODO + + int nloophack = AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2); + + GSRendererSettings rs; + + rs.m_interlace = AfxGetApp()->GetProfileInt(_T("Settings"), _T("interlace"), 0); + rs.m_aspectratio = AfxGetApp()->GetProfileInt(_T("Settings"), _T("aspectratio"), 1); + rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1); + rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); + rs.m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE); + + switch(renderer) + { + default: + case 0: s_gs = new GSRendererHW9(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 1: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 3: s_gs = new GSRendererHW10(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 4: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 5: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 6: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 7: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; + } + + s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); + + if(!s_gs->Create(CString(title))) + { + GSclose(); + + return -1; + } + + s_gs->m_wnd.Show(); + + *(HWND*)dsp = s_gs->m_wnd; + + // if(mt) _mm_setcsr(MXCSR); + + return 0; +} + +EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt) +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + int renderer = AfxGetApp()->GetProfileInt(_T("Settings"), _T("renderer"), 0); + + return GSopen(dsp, title, mt, renderer); +} + +EXPORT_C GSreset() +{ + s_gs->Reset(); +} + +EXPORT_C GSgifSoftReset(int mask) +{ + s_gs->SoftReset((BYTE)mask); +} + +EXPORT_C GSwriteCSR(UINT32 csr) +{ + s_gs->WriteCSR(csr); +} + +EXPORT_C GSreadFIFO(BYTE* mem) +{ + s_gs->ReadFIFO(mem, 1); +} + +EXPORT_C GSreadFIFO2(BYTE* mem, UINT32 size) +{ + s_gs->ReadFIFO(mem, size); +} + +EXPORT_C GSgifTransfer1(BYTE* mem, UINT32 addr) +{ + s_gs->Transfer<0>(mem + addr, (0x4000 - addr) / 16); +} + +EXPORT_C GSgifTransfer2(BYTE* mem, UINT32 size) +{ + s_gs->Transfer<1>(mem, size); +} + +EXPORT_C GSgifTransfer3(BYTE* mem, UINT32 size) +{ + s_gs->Transfer<2>(mem, size); +} + +EXPORT_C GSvsync(int field) +{ + s_gs->VSync(field); +} + +EXPORT_C_(UINT32) GSmakeSnapshot(char* path) +{ + return s_gs->MakeSnapshot(CString(path) + _T("gsdx")); +} + +EXPORT_C GSkeyEvent(keyEvent* ev) +{ +} + +EXPORT_C_(INT32) GSfreeze(int mode, GSFreezeData* data) +{ + if(mode == FREEZE_SAVE) + { + return s_gs->Freeze(data, false); + } + else if(mode == FREEZE_SIZE) + { + return s_gs->Freeze(data, true); + } + else if(mode == FREEZE_LOAD) + { + return s_gs->Defrost(data); + } + + return 0; +} + +EXPORT_C GSconfigure() +{ + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + GSSettingsDlg dlg; + + if(IDOK == dlg.DoModal()) + { + GSshutdown(); + GSinit(); + } +} + +EXPORT_C_(INT32) GStest() +{ + return 0; + + // TODO + + /* + AFX_MANAGE_STATE(AfxGetStaticModuleState()); + + CComPtr dev; + + return SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev)) ? 0 : -1; + */ +} + +EXPORT_C GSabout() +{ +} + +EXPORT_C GSirqCallback(void (*irq)()) +{ + s_irq = irq; +} + +EXPORT_C GSsetGameCRC(DWORD crc, int options) +{ + s_gs->SetGameCRC(crc, options); +} + +EXPORT_C GSgetLastTag(UINT32* tag) +{ + s_gs->GetLastTag(tag); +} + +EXPORT_C GSsetFrameSkip(int frameskip) +{ + s_gs->SetFrameSkip(frameskip); +} + +EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) +{ + int renderer = -1; + + { + char* start = lpszCmdLine; + char* end = NULL; + long n = strtol(lpszCmdLine, &end, 10); + if(end > start) {renderer = n; lpszCmdLine = end;} + } + + while(*lpszCmdLine == ' ') lpszCmdLine++; + + ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); + + CAtlArray buff; + + if(FILE* fp = fopen(lpszCmdLine, "rb")) + { + GSinit(); + + BYTE regs[0x2000]; + GSsetBaseMem(regs); + + HWND hWnd = NULL; + GSopen(&hWnd, _T(""), true, renderer); + + DWORD crc; + fread(&crc, 4, 1, fp); + GSsetGameCRC(crc, 0); + + GSFreezeData fd; + fread(&fd.size, 4, 1, fp); + fd.data = new BYTE[fd.size]; + fread(fd.data, fd.size, 1, fp); + GSfreeze(FREEZE_LOAD, &fd); + delete [] fd.data; + + fread(regs, 0x2000, 1, fp); + + long start = ftell(fp); + + unsigned int index, size, addr; + + GSvsync(1); + + while(1) + { + switch(fgetc(fp)) + { + case EOF: + fseek(fp, start, 0); + if(!IsWindowVisible(hWnd)) return; + break; + case 0: + index = fgetc(fp); + fread(&size, 4, 1, fp); + switch(index) + { + case 0: + if(buff.GetCount() < 0x4000) buff.SetCount(0x4000); + addr = 0x4000 - size; + fread(buff.GetData() + addr, size, 1, fp); + GSgifTransfer1(buff.GetData(), addr); + break; + case 1: + if(buff.GetCount() < size) buff.SetCount(size); + fread(buff.GetData(), size, 1, fp); + GSgifTransfer2(buff.GetData(), size / 16); + break; + case 2: + if(buff.GetCount() < size) buff.SetCount(size); + fread(buff.GetData(), size, 1, fp); + GSgifTransfer3(buff.GetData(), size / 16); + break; + } + break; + case 1: + GSvsync(fgetc(fp)); + if(!IsWindowVisible(hWnd)) return; + break; + case 2: + fread(&size, 4, 1, fp); + if(buff.GetCount() < size) buff.SetCount(size); + GSreadFIFO2(buff.GetData(), size / 16); + break; + case 3: + fread(regs, 0x2000, 1, fp); + break; + default: + return; + } + } + + GSclose(); + + GSshutdown(); + + fclose(fp); + } +} + +EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) +{ + ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); + + FILE* file = _tfopen(_T("c:\\log.txt"), _T("a")); + + _ftprintf(file, _T("-------------------------\n\n")); + + if(1) + { + GSLocalMemory mem; + + static struct {int psm; LPCSTR name;} s_format[] = + { + {PSM_PSMCT32, "32"}, + {PSM_PSMCT24, "24"}, + {PSM_PSMCT16, "16"}, + {PSM_PSMCT16S, "16S"}, + {PSM_PSMT8, "8"}, + {PSM_PSMT4, "4"}, + {PSM_PSMT8H, "8H"}, + {PSM_PSMT4HL, "4HL"}, + {PSM_PSMT4HH, "4HH"}, + {PSM_PSMZ32, "32Z"}, + {PSM_PSMZ24, "24Z"}, + {PSM_PSMZ16, "16Z"}, + {PSM_PSMZ16S, "16ZS"}, + }; + + BYTE* ptr = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); + + for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (BYTE)i; + + // + + for(int tbw = 5; tbw <= 10; tbw++) + { + int n = 256 << ((10 - tbw) * 2); + + int w = 1 << tbw; + int h = 1 << tbw; + + _ftprintf(file, _T("%d x %d\n\n"), w, h); + + for(int i = 0; i < countof(s_format); i++) + { + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm]; + + GSLocalMemory::writeImage wi = psm.wi; + GSLocalMemory::readImage ri = psm.ri; + GSLocalMemory::readTexture rtx = psm.rtx; + GSLocalMemory::readTexture rtxP = psm.rtxP; + + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.SBP = 0; + BITBLTBUF.SBW = w / 64; + BITBLTBUF.SPSM = s_format[i].psm; + BITBLTBUF.DBP = 0; + BITBLTBUF.DBW = w / 64; + BITBLTBUF.DPSM = s_format[i].psm; + + GIFRegTRXPOS TRXPOS; + + TRXPOS.SSAX = 0; + TRXPOS.SSAY = 0; + TRXPOS.DSAX = 0; + TRXPOS.DSAY = 0; + + GIFRegTRXREG TRXREG; + + TRXREG.RRW = w; + TRXREG.RRH = h; + + CRect r(0, 0, w, h); + + GIFRegTEX0 TEX0; + + TEX0.TBP0 = 0; + TEX0.TBW = w / 64; + + GIFRegTEXA TEXA; + + TEXA.TA0 = 0; + TEXA.TA1 = 0x80; + TEXA.AEM = 0; + + int trlen = w * h * psm.trbpp / 8; + int len = w * h * psm.bpp / 8; + + clock_t start, end; + + _ftprintf(file, _T("[%4s] "), s_format[i].name); + + start = clock(); + + for(int j = 0; j < n; j++) + { + int x = 0; + int y = 0; + + (mem.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); + } + + end = clock(); + + _ftprintf(file, _T("%6d %6d | "), (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); + + start = clock(); + + for(int j = 0; j < n; j++) + { + int x = 0; + int y = 0; + + (mem.*ri)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); + } + + end = clock(); + + _ftprintf(file, _T("%6d %6d | "), (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); + + start = clock(); + + for(int j = 0; j < n; j++) + { + (mem.*rtx)(r, ptr, w * 4, TEX0, TEXA); + } + + end = clock(); + + _ftprintf(file, _T("%6d %6d "), (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); + + if(psm.pal > 0) + { + start = clock(); + + for(int j = 0; j < n; j++) + { + (mem.*rtxP)(r, ptr, w, TEX0, TEXA); + } + + end = clock(); + + _ftprintf(file, _T("| %6d %6d "), (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); + } + + _ftprintf(file, _T("\n")); + + fflush(file); + } + + _ftprintf(file, _T("\n")); + } + + _aligned_free(ptr); + } + + if(0) + { + BYTE regs[0x2000]; + GSsetBaseMem(regs); + + HWND hWnd = NULL; + GSopen(&hWnd, _T(""), true, 6); + + s_gs->m_env.COLCLAMP.CLAMP = 1; + s_gs->m_env.PRIM.ABE = 0; + s_gs->m_env.PRIM.FST = 1; + s_gs->m_env.PRIM.TME = 1; + s_gs->m_env.PRIM.IIP = 0; + s_gs->m_env.TEXA.TA0 = 0; + s_gs->m_env.TEXA.TA1 = 0x80; + s_gs->m_env.TEXA.AEM = 0; + s_gs->m_context->ALPHA.A = 0; + s_gs->m_context->ALPHA.B = 1; + s_gs->m_context->ALPHA.C = 0; + s_gs->m_context->ALPHA.D = 1; + s_gs->m_context->CLAMP.WMS = 1; + s_gs->m_context->CLAMP.WMT = 1; + s_gs->m_context->CLAMP.MINU = 0; + s_gs->m_context->CLAMP.MINV = 0; + s_gs->m_context->CLAMP.MAXU = 511; + s_gs->m_context->CLAMP.MAXV = 511; + s_gs->m_context->FRAME.FBP = 0 >> 5; + s_gs->m_context->FRAME.FBW = 8; + s_gs->m_context->FRAME.PSM = PSM_PSMCT16S; + s_gs->m_context->SCISSOR.SCAX0 = 0; + s_gs->m_context->SCISSOR.SCAY0 = 0; + s_gs->m_context->SCISSOR.SCAX1 = 511; + s_gs->m_context->SCISSOR.SCAY1 = 511; + s_gs->m_context->TEST.ZTE = 0; + s_gs->m_context->TEST.ZTST = 2; + s_gs->m_context->TEX0.TBP0 = 0x2000; + s_gs->m_context->TEX0.TBW = 8; + s_gs->m_context->TEX0.PSM = PSM_PSMCT32; + s_gs->m_context->TEX0.TFX = 1; + s_gs->m_context->TEX0.TCC = 0; + s_gs->m_context->TEX0.TW = 9; + s_gs->m_context->TEX0.TH = 9; + s_gs->m_context->TEX1.MMAG = 0; + s_gs->m_context->TEX1.MMIN = 0; + s_gs->m_context->ZBUF.ZBP = 0x1000 >> 5; + s_gs->m_context->ZBUF.PSM = PSM_PSMZ24; + + GSRasterizer* ras = ((GSRendererSW*)s_gs)->GetRasterizer(); + + int count = 512 * 512; + + GSVertexSW* vertices = (GSVertexSW*)_aligned_malloc(count * sizeof(GSVertexSW), 16); +/* + // point + + for(int j = 0; j < 512; j++) + { + for(int i = 0; i < 512; i++) + { + GSVertexSW& v = vertices[(j << 7) + i]; + + v.p = GSVector4(i, j, 0, 0); + v.t = GSVector4((float)i + 0.5, (float)j + 0.5, 1.0f, 0.0f); + v.c = GSVector4(128.0f); + } + } + + s_gs->PRIM->PRIM = GS_POINTLIST; + + ras->Draw(vertices, count); + + vertices[0].p = GSVector4(0, 0, 0, 0); + vertices[0].t = GSVector4(0.5, 0.5, 1.0f, 0.0f); + vertices[0].c = GSVector4(128.0f); + vertices[1].p = GSVector4(512, 512, 0, 0); + vertices[1].t = GSVector4(512.5f, 512.5f, 1.0f, 0.0f); + vertices[1].c = GSVector4(128.0f); + + for(int i = 2; i < 512 * 512; i += 2) + { + memcpy(&vertices[i], &vertices[0], sizeof(vertices[0]) * 2); + } + + // sprite + + s_gs->PRIM->PRIM = GS_SPRITE; + + ras->Draw(vertices, count); + + // triangle + + vertices[0].p = GSVector4(0, 0, 0, 0); + vertices[0].t = GSVector4(0.5, 0.5, 1.0f, 0.0f); + vertices[0].c = GSVector4(128.0f); + vertices[1].p = GSVector4(512, 0, 0, 0); + vertices[1].t = GSVector4(512.5f, 0.5f, 1.0f, 0.0f); + vertices[1].c = GSVector4(128.0f); + vertices[2].p = GSVector4(512, 512, 0, 0); + vertices[2].t = GSVector4(512.5f, 512.5f, 1.0f, 0.0f); + vertices[2].c = GSVector4(128.0f); + + for(int i = 3; i < 512 * 512 - 2; i += 3) + { + memcpy(&vertices[i], &vertices[0], sizeof(vertices[0]) * 3); + } + + s_gs->PRIM->PRIM = GS_TRIANGLELIST; + + ras->Draw(vertices, 999); +*/ + // + + _aligned_free(vertices); + + GSclose(); + } + + // + + if(0) + { + GSLocalMemory mem; + + BYTE* ptr = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); + + for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (BYTE)i; + + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32]; + + GSLocalMemory::writeImage wi = psm.wi; + + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.DBP = 0; + BITBLTBUF.DBW = 32; + BITBLTBUF.DPSM = PSM_PSMCT32; + + GIFRegTRXPOS TRXPOS; + + TRXPOS.DSAX = 0; + TRXPOS.DSAY = 1; + + GIFRegTRXREG TRXREG; + + TRXREG.RRW = 256; + TRXREG.RRH = 256; + + int trlen = 256 * 256 * psm.trbpp / 8; + + int x = 0; + int y = 0; + + (mem.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); + } + + // + + fclose(file); +} + diff --git a/gsdx/GS.h b/gsdx/GS.h index 62a09b4..e6296bc 100644 --- a/gsdx/GS.h +++ b/gsdx/GS.h @@ -26,6 +26,8 @@ #pragma once +#define PLUGIN_VERSION 10 + #include "GSVector.h" #pragma pack(push, 1) @@ -222,6 +224,12 @@ enum GS_AFAIL // sps2regstructs.h // +#define REG32(name) \ +union name \ +{ \ + UINT32 ai32; \ + struct { \ + #define REG64(name) \ union name \ { \ @@ -238,12 +246,18 @@ union name \ UINT32 ai32[4]; \ struct { \ +#define REG32_(prefix, name) REG32(prefix##name) #define REG64_(prefix, name) REG64(prefix##name) #define REG128_(prefix, name) REG128(prefix##name) #define REG_END }; }; #define REG_END2 }; +#define REG32_SET(name) \ +union name \ +{ \ + UINT32 ai32; \ + #define REG64_SET(name) \ union name \ { \ @@ -1065,6 +1079,6 @@ enum {KEYPRESS=1, KEYRELEASE=2}; struct keyEvent {UINT32 key, event;}; enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2}; -struct freezeData {int size; BYTE* data;}; +struct GSFreezeData {int size; BYTE* data;}; enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; diff --git a/gsdx/GSClut.cpp b/gsdx/GSClut.cpp index 93817a7..58f41e4 100644 --- a/gsdx/GSClut.cpp +++ b/gsdx/GSClut.cpp @@ -136,6 +136,8 @@ void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE { ASSERT(TEX0.CSA < 16); + GSVector4i dummy; // this just forces stack alignment and enables inlining the next call + WriteCLUT_T32_I4_CSM1(&m_mem->m_vm32[m_mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4)); } diff --git a/gsdx/GSCrc.cpp b/gsdx/GSCrc.cpp index 5bbd881..212e8f7 100644 --- a/gsdx/GSCrc.cpp +++ b/gsdx/GSCrc.cpp @@ -65,6 +65,7 @@ CRC::Game CRC::m_games[] = {0xFE961D28, DBZBT2, US, false}, {0x0393B6BE, DBZBT2, EU, false}, {0xE2F289ED, DBZBT2, JP, false}, // Sparking Neo! + {0x35AA84D1, DBZBT2, Unknown, false}, {0x428113C2, DBZBT3, US, false}, {0xA422BB13, DBZBT3, EU, false}, {0x983c53d2, DBZBT3, Unknown, false}, @@ -82,14 +83,17 @@ CRC::Game CRC::m_games[] = {0x77E61C8A, GT4, Unknown, false}, {0xC164550A, WildArms5, JPUNDUB, false}, {0xC1640D2C, WildArms5, US, false}, + {0x0FCF8FE4, WildArms5, EU, false}, {0x8B029334, Manhunt2, Unknown, false}, {0x09F49E37, CrashBandicootWoC, Unknown, false}, {0x013E349D, ResidentEvil4, US, false}, {0x6BA2F6B9, ResidentEvil4, Unknown, false}, + {0x60FA8C69, ResidentEvil4, JP, false}, {0x72E1E60E, Spartan, Unknown, false}, {0x1B9B7563, AceCombat4, Unknown, false}, {0xEC432B24, Drakengard2, Unknown, false}, {0x1F88EE37, Tekken5, Unknown, false}, + {0x652050D2, Tekken5, Unknown, false}, {0x9E98B8AE, IkkiTousen, JP, false}, {0xD6385328, GodOfWar, US, false}, {0xFB0E6D72, GodOfWar, EU, false}, diff --git a/gsdx/GSDump.cpp b/gsdx/GSDump.cpp index 14e1808..1f372ec 100644 --- a/gsdx/GSDump.cpp +++ b/gsdx/GSDump.cpp @@ -35,7 +35,7 @@ GSDump::~GSDump() } } -void GSDump::Open(LPCTSTR fn, DWORD crc, const freezeData& fd, const void* regs) +void GSDump::Open(LPCTSTR fn, DWORD crc, const GSFreezeData& fd, const void* regs) { m_fp = _tfopen(fn, _T("wb")); m_vsyncs = 0; diff --git a/gsdx/GSDump.h b/gsdx/GSDump.h index 1226f13..275b400 100644 --- a/gsdx/GSDump.h +++ b/gsdx/GSDump.h @@ -51,7 +51,7 @@ public: GSDump(); virtual ~GSDump(); - void Open(LPCTSTR fn, DWORD crc, const freezeData& fd, const void* regs); + void Open(LPCTSTR fn, DWORD crc, const GSFreezeData& fd, const void* regs); void ReadFIFO(UINT32 size); void Transfer(int index, BYTE* mem, size_t size); void VSync(int field, bool last, const void* regs); diff --git a/gsdx/GSRasterizer.cpp b/gsdx/GSRasterizer.cpp index c7d7930..b2d256c 100644 --- a/gsdx/GSRasterizer.cpp +++ b/gsdx/GSRasterizer.cpp @@ -928,147 +928,145 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v) { do { + GSVector4i za = za_base + GSVector4i::load(za_offset); + + GSVector4i zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::one(za)); - GSVector4i za = za_base + GSVector4i::load(za_offset); - - GSVector4i zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::one(za)); + GSVector4i test; - GSVector4i test; - - if(!TestZ(zpsm, ztst, zs, za, test)) - { - continue; - } - - int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4))); - - GSVector4 c[12]; - - if(m_sel.tfx != TFX_NONE) - { - GSVector4 u = s; - GSVector4 v = t; - - if(!m_sel.fst) + if(!TestZ(zpsm, ztst, zs, za, test)) { - GSVector4 w = q.rcp(); + continue; + } - u *= w; - v *= w; + int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4))); - if(m_sel.ltf) + GSVector4 c[12]; + + if(m_sel.tfx != TFX_NONE) + { + GSVector4 u = s; + GSVector4 v = t; + + if(!m_sel.fst) { - u -= 0.5f; - v -= 0.5f; + GSVector4 w = q.rcp(); + + u *= w; + v *= w; + + if(m_sel.ltf) + { + u -= 0.5f; + v -= 0.5f; + } + } + + SampleTexture(pixels, ztst, m_sel.ltf, m_sel.tlu, test, u, v, c); + } + + AlphaTFX(m_sel.tfx, m_sel.tcc, a, c[3]); + + GSVector4i fm = m_slenv.fm; + GSVector4i zm = m_slenv.zm; + + if(!TestAlpha(m_sel.atst, m_sel.afail, c[3], fm, zm, test)) + { + continue; + } + + ColorTFX(m_sel.tfx, r, g, b, a, c[0], c[1], c[2]); + + if(m_sel.fge) + { + Fog(f, c[0], c[1], c[2]); + } + + GSVector4i fa = fa_base + GSVector4i::load(fa_offset); + + GSVector4i d = GSVector4i::zero(); + + if(m_sel.rfb) + { + d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa); + + if(fpsm != 1 && m_sel.date) + { + test |= (d ^ m_slenv.datm).sra32(31); + + if(test.alltrue()) + { + continue; + } } } - SampleTexture(ztst, test, pixels, m_sel.ltf, m_sel.tlu, u, v, c); - } + fm |= test; + zm |= test; - AlphaTFX(m_sel.tfx, m_sel.tcc, a, c[3]); - - GSVector4i fm = m_slenv.fm; - GSVector4i zm = m_slenv.zm; - - if(!TestAlpha(m_sel.atst, m_sel.afail, c[3], fm, zm, test)) - { - continue; - } - - ColorTFX(m_sel.tfx, r, g, b, a, c[0], c[1], c[2]); - - if(m_sel.fge) - { - Fog(f, c[0], c[1], c[2]); - } - - GSVector4i fa = fa_base + GSVector4i::load(fa_offset); - - GSVector4i d = GSVector4i::zero(); - - if(m_sel.rfb) - { - d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa); - - if(fpsm != 1 && m_sel.date) + if(m_sel.abe != 255) { - test |= (d ^ m_slenv.datm).sra32(31); + // GSVector4::expand(d, c[4], c[5], c[6], c[7]); - if(test.alltrue()) + c[4] = (d << 24) >> 24; + c[5] = (d << 16) >> 24; + c[6] = (d << 8) >> 24; + c[7] = (d >> 24); + + if(fpsm == 1) { - continue; + c[7] = GSVector4(128.0f); + } + + c[8] = GSVector4::zero(); + c[9] = GSVector4::zero(); + c[10] = GSVector4::zero(); + c[11] = m_slenv.afix; + + DWORD abea = m_sel.abea; + DWORD abeb = m_sel.abeb; + DWORD abec = m_sel.abec; + DWORD abed = m_sel.abed; + + GSVector4 r = (c[abea*4 + 0] - c[abeb*4 + 0]).mod2x(c[abec*4 + 3]) + c[abed*4 + 0]; + GSVector4 g = (c[abea*4 + 1] - c[abeb*4 + 1]).mod2x(c[abec*4 + 3]) + c[abed*4 + 1]; + GSVector4 b = (c[abea*4 + 2] - c[abeb*4 + 2]).mod2x(c[abec*4 + 3]) + c[abed*4 + 2]; + + if(m_sel.pabe) + { + GSVector4 mask = c[3] >= GSVector4(128.0f); + + c[0] = c[0].blend8(r, mask); + c[1] = c[1].blend8(g, mask); + c[2] = c[2].blend8(b, mask); + } + else + { + c[0] = r; + c[1] = g; + c[2] = b; } } - } - fm |= test; - zm |= test; + GSVector4i rb = GSVector4i(c[0]).ps32(GSVector4i(c[2])); + GSVector4i ga = GSVector4i(c[1]).ps32(GSVector4i(c[3])); + + GSVector4i rg = rb.upl16(ga) & m_slenv.colclamp; + GSVector4i ba = rb.uph16(ga) & m_slenv.colclamp; + + GSVector4i s = rg.upl32(ba).pu16(rg.uph32(ba)); - if(m_sel.abe != 255) - { -// GSVector4::expand(d, c[4], c[5], c[6], c[7]); - - c[4] = (d << 24) >> 24; - c[5] = (d << 16) >> 24; - c[6] = (d << 8) >> 24; - c[7] = (d >> 24); - - if(fpsm == 1) + if(fpsm != 1) { - c[7] = GSVector4(128.0f); + s |= m_slenv.fba; } - c[8] = GSVector4::zero(); - c[9] = GSVector4::zero(); - c[10] = GSVector4::zero(); - c[11] = m_slenv.afix; - - DWORD abea = m_sel.abea; - DWORD abeb = m_sel.abeb; - DWORD abec = m_sel.abec; - DWORD abed = m_sel.abed; - - GSVector4 r = (c[abea*4 + 0] - c[abeb*4 + 0]).mod2x(c[abec*4 + 3]) + c[abed*4 + 0]; - GSVector4 g = (c[abea*4 + 1] - c[abeb*4 + 1]).mod2x(c[abec*4 + 3]) + c[abed*4 + 1]; - GSVector4 b = (c[abea*4 + 2] - c[abeb*4 + 2]).mod2x(c[abec*4 + 3]) + c[abed*4 + 2]; - - if(m_sel.pabe) + if(m_sel.rfb) { - GSVector4 mask = c[3] >= GSVector4(128.0f); - - c[0] = c[0].blend8(r, mask); - c[1] = c[1].blend8(g, mask); - c[2] = c[2].blend8(b, mask); + s = s.blend(d, fm); } - else - { - c[0] = r; - c[1] = g; - c[2] = b; - } - } - - GSVector4i rb = GSVector4i(c[0]).ps32(GSVector4i(c[2])); - GSVector4i ga = GSVector4i(c[1]).ps32(GSVector4i(c[3])); - - GSVector4i rg = rb.upl16(ga) & m_slenv.colclamp; - GSVector4i ba = rb.uph16(ga) & m_slenv.colclamp; - - GSVector4i s = rg.upl32(ba).pu16(rg.uph32(ba)); - - if(fpsm != 1) - { - s |= m_slenv.fba; - } - - if(m_sel.rfb) - { - s = s.blend(d, fm); - } - - WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels); + WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels); } while(0); @@ -1099,7 +1097,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v) } } -void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD tlu, const GSVector4& u, const GSVector4& v, GSVector4* c) +void GSRasterizer::SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD tlu, const GSVector4i& test, const GSVector4& u, const GSVector4& v, GSVector4* c) { const void* RESTRICT tex = m_slenv.tex; const DWORD* RESTRICT pal = m_slenv.pal; diff --git a/gsdx/GSRasterizer.h b/gsdx/GSRasterizer.h index d8d4aa2..a7894e3 100644 --- a/gsdx/GSRasterizer.h +++ b/gsdx/GSRasterizer.h @@ -43,36 +43,6 @@ private: DWORD hash; }; - __declspec(align(16)) struct ScanlineEnvironment - { - int steps; - - void* vm; - - const void* tex; - const DWORD* pal; - DWORD tw; - - GSVector4i* fbr; - GSVector4i* zbr; - int** fbc; - int** zbc; - - GSVector4i fm, zm; - struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4 - GSVector4i datm; - GSVector4i colclamp; - GSVector4i fba; - GSVector4i aref; - GSVector4 afix; - GSVector4 afix2; - GSVector4 fc; - - GSVector4 dp, dp4; - GSVector4 dt, dt4; - GSVector4 dc, dc4; - }; - union ScanlineSelector { struct @@ -111,6 +81,36 @@ private: operator DWORD() {return dw;}// & 0x7fffffff;} }; + __declspec(align(16)) struct ScanlineEnvironment + { + int steps; + + void* vm; + + const void* tex; + const DWORD* pal; + DWORD tw; + + GSVector4i* fbr; + GSVector4i* zbr; + int** fbc; + int** zbc; + + GSVector4i fm, zm; + struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4 + GSVector4i datm; + GSVector4i colclamp; + GSVector4i fba; + GSVector4i aref; + GSVector4 afix; + GSVector4 afix2; + GSVector4 fc; + + GSVector4 dp, dp4; + GSVector4 dt, dt4; + GSVector4 dc, dc4; + }; + GSVector4i m_scissor; CRBMapC m_comap; ColumnOffset* m_fbco; @@ -137,7 +137,7 @@ private: template void DrawScanlineEx(int top, int left, int right, const Vertex& v); - __forceinline void SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD pal, const GSVector4& u, const GSVector4& v, GSVector4* c); + __forceinline void SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD pal, const GSVector4i& test, const GSVector4& u, const GSVector4& v, GSVector4* c); __forceinline void ColorTFX(DWORD tfx, const GSVector4& rf, const GSVector4& gf, const GSVector4& bf, const GSVector4& af, GSVector4& rt, GSVector4& gt, GSVector4& bt); __forceinline void AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4& af, GSVector4& at); __forceinline void Fog(const GSVector4& f, GSVector4& r, GSVector4& g, GSVector4& b); diff --git a/gsdx/GSRasterizerEx.cpp b/gsdx/GSRasterizerEx.cpp index d4414c0..ed163ea 100644 --- a/gsdx/GSRasterizerEx.cpp +++ b/gsdx/GSRasterizerEx.cpp @@ -23,7 +23,7 @@ #include "GSRasterizer.h" void GSRasterizer::InitEx() -{ +{/* // ffx m_dsmap.SetAt(0x2420c265, &GSRasterizer::DrawScanlineEx<0x2420c265>); @@ -1245,6 +1245,7 @@ void GSRasterizer::InitEx() m_dsmap.SetAt(0xa4802c09, &GSRasterizer::DrawScanlineEx<0xa4802c09>); m_dsmap.SetAt(0xa485bc29, &GSRasterizer::DrawScanlineEx<0xa485bc29>); m_dsmap.SetAt(0xe441bc29, &GSRasterizer::DrawScanlineEx<0xe441bc29>); +*/ /* // dmc (fixme) @@ -1321,189 +1322,187 @@ void GSRasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v) { do { + GSVector4i za = za_base + GSVector4i::load(za_offset); + + GSVector4i zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::one(za)); - GSVector4i za = za_base + GSVector4i::load(za_offset); - - GSVector4i zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::one(za)); + GSVector4i test; - GSVector4i test; - - if(!TestZ(zpsm, ztst, zs, za, test)) - { - continue; - } - - int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4))); - - GSVector4 c[12]; - - if(tfx != TFX_NONE) - { - GSVector4 u = s; - GSVector4 v = t; - - if(!fst) + if(!TestZ(zpsm, ztst, zs, za, test)) { - GSVector4 w = q.rcp(); - - u *= w; - v *= w; - - if(ltf) - { - u -= 0.5f; - v -= 0.5f; - } + continue; } - SampleTexture(ztst, test, pixels, ltf, tlu, u, v, c); - } + int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4))); - AlphaTFX(tfx, tcc, a, c[3]); + GSVector4 c[12]; - GSVector4i fm = m_slenv.fm; - GSVector4i zm = m_slenv.zm; - - if(!TestAlpha(atst, afail, c[3], fm, zm, test)) - { - continue; - } - - ColorTFX(tfx, r, g, b, a, c[0], c[1], c[2]); - - if(fge) - { - Fog(f, c[0], c[1], c[2]); - } - - GSVector4i fa = fa_base + GSVector4i::load(fa_offset); - - GSVector4i d = GSVector4i::zero(); - - if(rfb) - { - d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa); - - if(fpsm != 1 && date) + if(tfx != TFX_NONE) { - test |= (d ^ m_slenv.datm).sra32(31); + GSVector4 u = s; + GSVector4 v = t; - if(test.alltrue()) + if(!fst) { - continue; - } - } - } + GSVector4 w = q.rcp(); - fm |= test; - zm |= test; + u *= w; + v *= w; - if(abe != 255) - { -// GSVector4::expand(d, c[4], c[5], c[6], c[7]); - - c[4] = (d << 24) >> 24; - c[5] = (d << 16) >> 24; - c[6] = (d << 8) >> 24; - c[7] = (d >> 24); - - if(fpsm == 1) - { - c[7] = GSVector4(128.0f); - } - - c[8] = GSVector4::zero(); - c[9] = GSVector4::zero(); - c[10] = GSVector4::zero(); - c[11] = m_slenv.afix; - - /* - GSVector4 r = (c[abea*4 + 0] - c[abeb*4 + 0]).mod2x(c[abec*4 + 3]) + c[abed*4 + 0]; - GSVector4 g = (c[abea*4 + 1] - c[abeb*4 + 1]).mod2x(c[abec*4 + 3]) + c[abed*4 + 1]; - GSVector4 b = (c[abea*4 + 2] - c[abeb*4 + 2]).mod2x(c[abec*4 + 3]) + c[abed*4 + 2]; - */ - - GSVector4 r, g, b; - - if(abea != abeb) - { - r = c[abea*4 + 0]; - g = c[abea*4 + 1]; - b = c[abea*4 + 2]; - - if(abeb != 2) - { - r -= c[abeb*4 + 0]; - g -= c[abeb*4 + 1]; - b -= c[abeb*4 + 2]; - } - - if(!(fpsm == 1 && abec == 1)) - { - if(abec == 2) + if(ltf) { - r *= m_slenv.afix2; - g *= m_slenv.afix2; - b *= m_slenv.afix2; - } - else - { - r = r.mod2x(c[abec*4 + 3]); - g = g.mod2x(c[abec*4 + 3]); - b = b.mod2x(c[abec*4 + 3]); + u -= 0.5f; + v -= 0.5f; } } - if(abed < 2) + SampleTexture(ztst, test, pixels, ltf, tlu, u, v, c); + } + + AlphaTFX(tfx, tcc, a, c[3]); + + GSVector4i fm = m_slenv.fm; + GSVector4i zm = m_slenv.zm; + + if(!TestAlpha(atst, afail, c[3], fm, zm, test)) + { + continue; + } + + ColorTFX(tfx, r, g, b, a, c[0], c[1], c[2]); + + if(fge) + { + Fog(f, c[0], c[1], c[2]); + } + + GSVector4i fa = fa_base + GSVector4i::load(fa_offset); + + GSVector4i d = GSVector4i::zero(); + + if(rfb) + { + d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa); + + if(fpsm != 1 && date) { - r += c[abed*4 + 0]; - g += c[abed*4 + 1]; - b += c[abed*4 + 2]; + test |= (d ^ m_slenv.datm).sra32(31); + + if(test.alltrue()) + { + continue; + } } } - else + + fm |= test; + zm |= test; + + if(abe != 255) { - r = c[abed*4 + 0]; - g = c[abed*4 + 1]; - b = c[abed*4 + 2]; + // GSVector4::expand(d, c[4], c[5], c[6], c[7]); + + c[4] = (d << 24) >> 24; + c[5] = (d << 16) >> 24; + c[6] = (d << 8) >> 24; + c[7] = (d >> 24); + + if(fpsm == 1) + { + c[7] = GSVector4(128.0f); + } + + c[8] = GSVector4::zero(); + c[9] = GSVector4::zero(); + c[10] = GSVector4::zero(); + c[11] = m_slenv.afix; + + /* + GSVector4 r = (c[abea*4 + 0] - c[abeb*4 + 0]).mod2x(c[abec*4 + 3]) + c[abed*4 + 0]; + GSVector4 g = (c[abea*4 + 1] - c[abeb*4 + 1]).mod2x(c[abec*4 + 3]) + c[abed*4 + 1]; + GSVector4 b = (c[abea*4 + 2] - c[abeb*4 + 2]).mod2x(c[abec*4 + 3]) + c[abed*4 + 2]; + */ + + GSVector4 r, g, b; + + if(abea != abeb) + { + r = c[abea*4 + 0]; + g = c[abea*4 + 1]; + b = c[abea*4 + 2]; + + if(abeb != 2) + { + r -= c[abeb*4 + 0]; + g -= c[abeb*4 + 1]; + b -= c[abeb*4 + 2]; + } + + if(!(fpsm == 1 && abec == 1)) + { + if(abec == 2) + { + r *= m_slenv.afix2; + g *= m_slenv.afix2; + b *= m_slenv.afix2; + } + else + { + r = r.mod2x(c[abec*4 + 3]); + g = g.mod2x(c[abec*4 + 3]); + b = b.mod2x(c[abec*4 + 3]); + } + } + + if(abed < 2) + { + r += c[abed*4 + 0]; + g += c[abed*4 + 1]; + b += c[abed*4 + 2]; + } + } + else + { + r = c[abed*4 + 0]; + g = c[abed*4 + 1]; + b = c[abed*4 + 2]; + } + + if(pabe) + { + GSVector4 mask = c[3] >= GSVector4(128.0f); + + c[0] = c[0].blend8(r, mask); + c[1] = c[1].blend8(g, mask); + c[2] = c[2].blend8(b, mask); + } + else + { + c[0] = r; + c[1] = g; + c[2] = b; + } } - if(pabe) + GSVector4i rb = GSVector4i(c[0]).ps32(GSVector4i(c[2])); + GSVector4i ga = GSVector4i(c[1]).ps32(GSVector4i(c[3])); + + GSVector4i rg = rb.upl16(ga) & m_slenv.colclamp; + GSVector4i ba = rb.uph16(ga) & m_slenv.colclamp; + + GSVector4i s = rg.upl32(ba).pu16(rg.uph32(ba)); + + if(fpsm != 1) { - GSVector4 mask = c[3] >= GSVector4(128.0f); - - c[0] = c[0].blend8(r, mask); - c[1] = c[1].blend8(g, mask); - c[2] = c[2].blend8(b, mask); + s |= m_slenv.fba; } - else + + if(rfb) { - c[0] = r; - c[1] = g; - c[2] = b; + s = s.blend(d, fm); } - } - - GSVector4i rb = GSVector4i(c[0]).ps32(GSVector4i(c[2])); - GSVector4i ga = GSVector4i(c[1]).ps32(GSVector4i(c[3])); - - GSVector4i rg = rb.upl16(ga) & m_slenv.colclamp; - GSVector4i ba = rb.uph16(ga) & m_slenv.colclamp; - - GSVector4i s = rg.upl32(ba).pu16(rg.uph32(ba)); - - if(fpsm != 1) - { - s |= m_slenv.fba; - } - - if(rfb) - { - s = s.blend(d, fm); - } - - WriteFrameAndZBufX(fpsm == 1 && rfb ? 0 : fpsm, fa, fm, s, wzb ? zpsm : 3, za, zm, zs, pixels); + WriteFrameAndZBufX(fpsm == 1 && rfb ? 0 : fpsm, fa, fm, s, wzb ? zpsm : 3, za, zm, zs, pixels); } while(0); diff --git a/gsdx/GSRenderer.h b/gsdx/GSRenderer.h index 1bc702d..be39217 100644 --- a/gsdx/GSRenderer.h +++ b/gsdx/GSRenderer.h @@ -89,6 +89,9 @@ protected: return false; } +public: + GSWnd m_wnd; + public: GSRendererBase(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) : GSState(base, mt, irq, nloophack) @@ -105,8 +108,6 @@ public: virtual bool Create(LPCTSTR title) = 0; virtual void VSync(int field) = 0; virtual bool MakeSnapshot(LPCTSTR path) = 0; - - GSWnd m_wnd; }; template class GSRenderer : public GSRendererBase @@ -445,7 +446,7 @@ public: if((::GetAsyncKeyState(VK_SHIFT) & 0x8000) && !m_dump) { - freezeData fd; + GSFreezeData fd; fd.size = 0; fd.data = NULL; Freeze(&fd, true); diff --git a/gsdx/GSRendererSW.h b/gsdx/GSRendererSW.h index 4d3fe29..f9eeb3d 100644 --- a/gsdx/GSRendererSW.h +++ b/gsdx/GSRendererSW.h @@ -120,7 +120,7 @@ protected: void VertexKick(bool skip) { - GSVertexSW& v = m_vl.AddTail(); + Vertex& v = m_vl.AddTail(); int x = (int)m_v.XYZ.X - (int)m_context->XYOFFSET.OFX; int y = (int)m_v.XYZ.Y - (int)m_context->XYOFFSET.OFY; diff --git a/gsdx/GSState.cpp b/gsdx/GSState.cpp index 64080f6..ff28479 100644 --- a/gsdx/GSState.cpp +++ b/gsdx/GSState.cpp @@ -1412,7 +1412,7 @@ template static void ReadState(T* dst, BYTE*& src, size_t len = sizeof( src += len; } -int GSState::Freeze(freezeData* fd, bool sizeonly) +int GSState::Freeze(GSFreezeData* fd, bool sizeonly) { if(sizeonly) { @@ -1484,7 +1484,7 @@ int GSState::Freeze(freezeData* fd, bool sizeonly) return 0; } -int GSState::Defrost(const freezeData* fd) +int GSState::Defrost(const GSFreezeData* fd) { if(!fd || !fd->data || fd->size == 0) { diff --git a/gsdx/GSState.h b/gsdx/GSState.h index 068dbe9..b1b7409 100644 --- a/gsdx/GSState.h +++ b/gsdx/GSState.h @@ -147,7 +147,7 @@ public: GSDrawingContext* m_context; GSVertex m_v; float m_q; - int m_vprim; + DWORD m_vprim; GSPerfMon m_perfmon; bool m_nloophack; @@ -197,8 +197,8 @@ public: void WriteCSR(UINT32 csr) {CSR->ai32[1] = csr;} void ReadFIFO(BYTE* mem, int size); template void Transfer(BYTE* mem, UINT32 size); - int Freeze(freezeData* fd, bool sizeonly); - int Defrost(const freezeData* fd); + int Freeze(GSFreezeData* fd, bool sizeonly); + int Defrost(const GSFreezeData* fd); void GetLastTag(UINT32* tag) {*tag = m_path3hack; m_path3hack = 0;} virtual void SetGameCRC(DWORD crc, int options); void SetFrameSkip(int frameskip); diff --git a/gsdx/GSUtil.cpp b/gsdx/GSUtil.cpp index 2fca96d..2e2ed0b 100644 --- a/gsdx/GSUtil.cpp +++ b/gsdx/GSUtil.cpp @@ -22,6 +22,7 @@ #include "stdafx.h" #include "GS.h" #include "GSUtil.h" +#include "svnrev.h" static struct GSUtilMaps { @@ -81,15 +82,16 @@ static struct GSUtilMaps } s_maps; -int GSUtil::GetPrimClass(DWORD prim) +DWORD GSUtil::GetPrimClass(DWORD prim) { return s_maps.PrimClassField[prim]; } -int GSUtil::GetPrimVertexCount(DWORD prim) +DWORD GSUtil::GetPrimVertexCount(DWORD prim) { return s_maps.PrimVertexCount[prim]; } + bool GSUtil::HasSharedBits(DWORD spsm, DWORD dpsm) { return s_maps.SharedBitsField[spsm][dpsm]; @@ -124,4 +126,115 @@ bool GSUtil::IsRectInRectV(const CRect& inner, const CRect& outer) return outer.left <= inner.left && inner.right <= outer.right; } +bool GSUtil::CheckDirectX() +{ + CString str; + str.Format(_T("d3dx9_%d.dll"), D3DX_SDK_VERSION); + + if(HINSTANCE hDll = LoadLibrary(str)) + { + FreeLibrary(hDll); + } + else + { + int res = AfxMessageBox(_T("Please update DirectX!\n\nWould you like to open the download page in your browser?"), MB_YESNO); + + if(res == IDYES) + { + ShellExecute(NULL, _T("open"), _T("http://www.microsoft.com/downloads/details.aspx?FamilyId=2DA43D38-DB71-4C1B-BC6A-9B6652CD92A3"), NULL, NULL, SW_SHOWNORMAL); + } + + return false; + } + + return true; +} + +static bool _CheckSSE() +{ + __try + { + static __m128i m; + + #if _M_SSE >= 0x402 + m.m128i_i32[0] = _mm_popcnt_u32(1234); + #elif _M_SSE >= 0x401 + m = _mm_packus_epi32(m, m); + #elif _M_SSE >= 0x301 + m = _mm_alignr_epi8(m, m, 1); + #elif _M_SSE >= 0x200 + m = _mm_packs_epi32(m, m); + #endif + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + + return true; +} + +bool GSUtil::CheckSSE() +{ + if(!_CheckSSE()) + { + CString str; + str.Format(_T("This CPU does not support SSE %d.%02d"), _M_SSE >> 8, _M_SSE & 0xff); + AfxMessageBox(str, MB_OK); + + return false; + } + + return true; +} + +char* GSUtil::GetLibName() +{ + CString str; + + str.Format(_T("GSdx %d"), SVN_REV); + + if(SVN_MODS) str += _T("m"); + +#if _M_AMD64 + str += _T(" 64-bit"); +#endif + + CAtlList sl; + +#ifdef __INTEL_COMPILER + CString s; + s.Format(_T("Intel C++ %d.%02d"), __INTEL_COMPILER/100, __INTEL_COMPILER%100); + sl.AddTail(s); +#elif _MSC_VER + CString s; + s.Format(_T("MSVC %d.%02d"), _MSC_VER/100, _MSC_VER%100); + sl.AddTail(s); +#endif + +#if _M_SSE >= 0x402 + sl.AddTail(_T("SSE42")); +#elif _M_SSE >= 0x401 + sl.AddTail(_T("SSE41")); +#elif _M_SSE >= 0x301 + sl.AddTail(_T("SSSE3")); +#elif _M_SSE >= 0x200 + sl.AddTail(_T("SSE2")); +#elif _M_SSE >= 0x100 + sl.AddTail(_T("SSE")); +#endif + + POSITION pos = sl.GetHeadPosition(); + + while(pos) + { + if(pos == sl.GetHeadPosition()) str += _T(" ("); + str += sl.GetNext(pos); + str += pos ? _T(", ") : _T(")"); + } + + static char buff[256]; + strncpy(buff, CStringA(str), min(countof(buff)-1, str.GetLength())); + return buff; +} \ No newline at end of file diff --git a/gsdx/GSUtil.h b/gsdx/GSUtil.h index f95432e..f9277b5 100644 --- a/gsdx/GSUtil.h +++ b/gsdx/GSUtil.h @@ -21,44 +21,14 @@ #pragma once +#include "GS.h" + class GSUtil { public: - static int GetPrimClass(DWORD prim); - static int GetPrimVertexCount(DWORD prim); - /* - static int GetPrimClass(DWORD prim) - { - switch(prim) - { - case GS_POINTLIST: return 0; - case GS_LINELIST: return 1; - case GS_LINESTRIP: return 1; - case GS_TRIANGLELIST: return 2; - case GS_TRIANGLESTRIP: return 2; - case GS_TRIANGLEFAN: return 2; - case GS_SPRITE: return 3; - case GS_INVALID: return -1; - default: __assume(0); - } - } + static DWORD GetPrimClass(DWORD prim); + static DWORD GetPrimVertexCount(DWORD prim); - static int GetPrimVertexCount(DWORD prim) - { - switch(prim) - { - case GS_POINTLIST: return 1; - case GS_LINELIST: return 2; - case GS_LINESTRIP: return 2; - case GS_TRIANGLELIST: return 3; - case GS_TRIANGLESTRIP: return 3; - case GS_TRIANGLEFAN: return 3; - case GS_SPRITE: return 2; - case GS_INVALID: return 1; - default: __assume(0); - } - } - */ static bool HasSharedBits(DWORD spsm, DWORD dpsm); static bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm); static bool HasCompatibleBits(DWORD spsm, DWORD dpsm); @@ -86,5 +56,10 @@ public: return 3; } } + + static bool CheckDirectX(); + static bool CheckSSE(); + + static char* GetLibName(); }; diff --git a/gsdx/GSVector.cpp b/gsdx/GSVector.cpp index 98da0cf..a2ee69a 100644 --- a/gsdx/GSVector.cpp +++ b/gsdx/GSVector.cpp @@ -23,6 +23,7 @@ #include "GSVector.h" const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f); +const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); void GSVector4::operator = (const GSVector4i& v) { @@ -34,3 +35,12 @@ void GSVector4i::operator = (const GSVector4& v) m = _mm_cvttps_epi32(v); } +GSVector4i GSVector4i::cast(const GSVector4& v) +{ + return GSVector4i(_mm_castps_si128(v.m)); +} + +GSVector4 GSVector4::cast(const GSVector4i& v) +{ + return GSVector4(_mm_castsi128_ps(v.m)); +} diff --git a/gsdx/GSVector.h b/gsdx/GSVector.h index fc20af3..546434c 100644 --- a/gsdx/GSVector.h +++ b/gsdx/GSVector.h @@ -174,6 +174,8 @@ public: #endif } + static GSVector4i cast(const GSVector4& v); + #if _M_SSE >= 0x401 GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const @@ -605,6 +607,51 @@ public: #endif + GSVector4i eq8(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpeq_epi8(m, v.m)); + } + + GSVector4i eq16(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpeq_epi16(m, v.m)); + } + + GSVector4i eq32(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpeq_epi32(m, v.m)); + } + + GSVector4i gt8(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpgt_epi8(m, v.m)); + } + + GSVector4i gt16(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpgt_epi16(m, v.m)); + } + + GSVector4i gt32(const GSVector4i& v) const + { + return GSVector4i(_mm_cmpgt_epi32(m, v.m)); + } + + GSVector4i lt8(const GSVector4i& v) const + { + return GSVector4i(_mm_cmplt_epi8(m, v.m)); + } + + GSVector4i lt16(const GSVector4i& v) const + { + return GSVector4i(_mm_cmplt_epi16(m, v.m)); + } + + GSVector4i lt32(const GSVector4i& v) const + { + return GSVector4i(_mm_cmplt_epi32(m, v.m)); + } + GSVector4i andnot(const GSVector4i& v) const { return GSVector4i(_mm_andnot_si128(v.m, m)); @@ -1105,6 +1152,11 @@ public: return invzero().srl16(15); } + static GSVector4i x00ff() + { + return invzero().srl16(8); + } + static GSVector4i x000000ff() { return invzero().srl32(24); @@ -1130,6 +1182,11 @@ public: return invzero().srl32(18); } + static GSVector4i x00007fff() + { + return invzero().srl32(17); + } + static GSVector4i invzero(const GSVector4i& v) { // - vc can't generate a simple pxor xmm0, xmm0 / pcmpeqd xmm0, xmm0 @@ -1148,6 +1205,11 @@ public: return invzero(v).srl16(15); } + static GSVector4i x00ff(const GSVector4i& v) + { + return invzero(v).srl16(8); + } + static GSVector4i x000000ff(const GSVector4i& v) { return invzero(v).srl32(24); @@ -1173,6 +1235,11 @@ public: return invzero(v).srl32(18); } + static GSVector4i x00007fff(const GSVector4i& v) + { + return invzero(v).srl32(17); + } + #if _M_SSE >= 0x401 static GSVector4i loadnt(const void* p) @@ -1596,6 +1663,7 @@ public: }; static const GSVector4 m_ps0123; + static const GSVector4 m_ps4567; GSVector4() { @@ -1688,6 +1756,8 @@ public: return GSVector4i(*this).rgba64(); } + static GSVector4 cast(const GSVector4i& v); + GSVector4 abs() const { return GSVector4(_mm_abs_ps(m)); @@ -1874,6 +1944,11 @@ public: return GSVector4(m_ps0123); } + static GSVector4 ps4567() + { + return GSVector4(m_ps4567); + } + static GSVector4 loadl(const void* p) { return GSVector4(_mm_castpd_ps(_mm_load_sd((double*)p))); diff --git a/gsdx/GSVertexList.h b/gsdx/GSVertexList.h index 136002f..29bc83a 100644 --- a/gsdx/GSVertexList.h +++ b/gsdx/GSVertexList.h @@ -25,7 +25,7 @@ template class GSVertexList { void* m_base; Vertex* m_v[3]; - int m_count; + DWORD m_count; public: GSVertexList() @@ -80,7 +80,7 @@ public: v = *m_v[i]; } - int GetCount() + DWORD GetCount() { return m_count; } diff --git a/gsdx/GSdx.cpp b/gsdx/GSdx.cpp index 4c5696c..3cbaef9 100644 --- a/gsdx/GSdx.cpp +++ b/gsdx/GSdx.cpp @@ -21,12 +21,8 @@ #include "stdafx.h" #include "GSdx.h" -#include "GSRendererHW9.h" -#include "GSRendererHW10.h" -#include "GSRendererSW.h" -#include "GSRendererNull.h" -#include "GSSettingsDlg.h" -#include "svnrev.h" + +#define PLUGIN_VERSION 10 // // Note! @@ -100,781 +96,4 @@ BOOL GSdxApp::InitInstance() m_pszProfileName = _tcsdup((LPCTSTR)path); return TRUE; -} - -static bool CheckSSE() -{ - __try - { - static __m128i m; - - #if _M_SSE >= 0x402 - m.m128i_i32[0] = _mm_popcnt_u32(1234); - #elif _M_SSE >= 0x401 - m = _mm_packus_epi32(m, m); - #elif _M_SSE >= 0x301 - m = _mm_alignr_epi8(m, m, 1); - #elif _M_SSE >= 0x200 - m = _mm_packs_epi32(m, m); - #endif - } - __except(EXCEPTION_EXECUTE_HANDLER) - { - return false; - } - - return true; -} - -// - -#define PS2E_LT_GS 0x01 -#define PS2E_GS_VERSION 0x0006 -#define PS2E_X86 0x01 // 32 bit -#define PS2E_X86_64 0x02 // 64 bit - -EXPORT_C_(UINT32) PS2EgetLibType() -{ - return PS2E_LT_GS; -} - -EXPORT_C_(char*) PS2EgetLibName() -{ - CString str; - - str.Format(_T("GSdx %d"), SVN_REV); - - if(SVN_MODS) str += _T("m"); - -#if _M_AMD64 - str += _T(" 64-bit"); -#endif - - CAtlList sl; - -#ifdef __INTEL_COMPILER - CString s; - s.Format(_T("Intel C++ %d.%02d"), __INTEL_COMPILER/100, __INTEL_COMPILER%100); - sl.AddTail(s); -#elif _MSC_VER - CString s; - s.Format(_T("MSVC %d.%02d"), _MSC_VER/100, _MSC_VER%100); - sl.AddTail(s); -#endif - -#if _M_SSE >= 0x402 - sl.AddTail(_T("SSE42")); -#elif _M_SSE >= 0x401 - sl.AddTail(_T("SSE41")); -#elif _M_SSE >= 0x301 - sl.AddTail(_T("SSSE3")); -#elif _M_SSE >= 0x200 - sl.AddTail(_T("SSE2")); -#elif _M_SSE >= 0x100 - sl.AddTail(_T("SSE")); -#endif - - POSITION pos = sl.GetHeadPosition(); - - while(pos) - { - if(pos == sl.GetHeadPosition()) str += _T(" ("); - str += sl.GetNext(pos); - str += pos ? _T(", ") : _T(")"); - } - - static char buff[256]; - strncpy(buff, CStringA(str), min(countof(buff)-1, str.GetLength())); - return buff; -} - -EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type) -{ - const UINT32 revision = 0; - const UINT32 build = 1; - const UINT32 minor = 10; - - return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (minor << 24); -} - -EXPORT_C_(UINT32) PS2EgetCpuPlatform() -{ -#if _M_AMD64 - return PS2E_X86_64; -#else - return PS2E_X86; -#endif -} - -////////////////// - -static HRESULT s_hr = E_FAIL; -static GSRendererBase* s_gs; -static void (*s_irq)() = NULL; -static BYTE* s_basemem = NULL; - -EXPORT_C GSsetBaseMem(BYTE* mem) -{ - s_basemem = mem - 0x12000000; -} - -EXPORT_C_(INT32) GSinit() -{ - AFX_MANAGE_STATE(AfxGetStaticModuleState()); - - return 0; -} - -EXPORT_C GSshutdown() -{ - AFX_MANAGE_STATE(AfxGetStaticModuleState()); -} - -EXPORT_C GSclose() -{ - delete s_gs; - - s_gs = NULL; - - if(SUCCEEDED(s_hr)) - { - ::CoUninitialize(); - - s_hr = E_FAIL; - } -} - -static INT32 GSopen(void* dsp, char* title, int mt, int renderer) -{ - AFX_MANAGE_STATE(AfxGetStaticModuleState()); - - // - - CString str; - - str.Format(_T("d3dx9_%d.dll"), D3DX_SDK_VERSION); - - if(HINSTANCE hDll = LoadLibrary(str)) - { - FreeLibrary(hDll); - } - else - { - int res = AfxMessageBox(_T("Please update DirectX!\n\nWould you like to open the download page in your browser?"), MB_YESNO); - - if(res == IDYES) - { - ShellExecute(NULL, _T("open"), _T("http://www.microsoft.com/downloads/details.aspx?FamilyId=2DA43D38-DB71-4C1B-BC6A-9B6652CD92A3"), NULL, NULL, SW_SHOWNORMAL); - } - - return -1; - } - - // - - if(!CheckSSE()) - { - CString str; - str.Format(_T("This CPU does not support SSE %d.%02d"), _M_SSE >> 8, _M_SSE & 0xff); - AfxMessageBox(str, MB_OK); - return -1; - } - - // - - GSclose(); - - // TODO - - int nloophack = AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2); - - GSRendererSettings rs; - - rs.m_interlace = AfxGetApp()->GetProfileInt(_T("Settings"), _T("interlace"), 0); - rs.m_aspectratio = AfxGetApp()->GetProfileInt(_T("Settings"), _T("aspectratio"), 1); - rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1); - rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); - rs.m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE); - - switch(renderer) - { - default: - case 0: s_gs = new GSRendererHW9(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 1: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 3: s_gs = new GSRendererHW10(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 4: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 5: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 6: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 7: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; - } - - s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); - - if(!s_gs->Create(CString(title))) - { - GSclose(); - return -1; - } - - s_gs->m_wnd.Show(); - - *(HWND*)dsp = s_gs->m_wnd; - - // if(mt) _mm_setcsr(MXCSR); - - return 0; -} - -EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt) -{ - AFX_MANAGE_STATE(AfxGetStaticModuleState()); - - int renderer = AfxGetApp()->GetProfileInt(_T("Settings"), _T("renderer"), 0); - - return GSopen(dsp, title, mt, renderer); -} - -EXPORT_C GSreset() -{ - s_gs->Reset(); -} - -EXPORT_C GSgifSoftReset(int mask) -{ - s_gs->SoftReset((BYTE)mask); -} - -EXPORT_C GSwriteCSR(UINT32 csr) -{ - s_gs->WriteCSR(csr); -} - -EXPORT_C GSreadFIFO(BYTE* mem) -{ - s_gs->ReadFIFO(mem, 1); -} - -EXPORT_C GSreadFIFO2(BYTE* mem, UINT32 size) -{ - s_gs->ReadFIFO(mem, size); -} - -EXPORT_C GSgifTransfer1(BYTE* mem, UINT32 addr) -{ - s_gs->Transfer<0>(mem + addr, (0x4000 - addr) / 16); -} - -EXPORT_C GSgifTransfer2(BYTE* mem, UINT32 size) -{ - s_gs->Transfer<1>(mem, size); -} - -EXPORT_C GSgifTransfer3(BYTE* mem, UINT32 size) -{ - s_gs->Transfer<2>(mem, size); -} - -EXPORT_C GSvsync(int field) -{ - s_gs->VSync(field); -} - -EXPORT_C_(UINT32) GSmakeSnapshot(char* path) -{ - return s_gs->MakeSnapshot(CString(path) + _T("gsdx")); -} - -EXPORT_C GSkeyEvent(keyEvent* ev) -{ -} - -EXPORT_C_(INT32) GSfreeze(int mode, freezeData* data) -{ - if(mode == FREEZE_SAVE) - { - return s_gs->Freeze(data, false); - } - else if(mode == FREEZE_SIZE) - { - return s_gs->Freeze(data, true); - } - else if(mode == FREEZE_LOAD) - { - return s_gs->Defrost(data); - } - - return 0; -} - -EXPORT_C GSconfigure() -{ - AFX_MANAGE_STATE(AfxGetStaticModuleState()); - - GSSettingsDlg dlg; - - if(IDOK == dlg.DoModal()) - { - GSshutdown(); - GSinit(); - } -} - -EXPORT_C_(INT32) GStest() -{ - return 0; - - // TODO - - /* - AFX_MANAGE_STATE(AfxGetStaticModuleState()); - - CComPtr dev; - - return SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev)) ? 0 : -1; - */ -} - -EXPORT_C GSabout() -{ -} - -EXPORT_C GSirqCallback(void (*irq)()) -{ - s_irq = irq; -} - -EXPORT_C GSsetGameCRC(DWORD crc, int options) -{ - s_gs->SetGameCRC(crc, options); -} - -EXPORT_C GSgetLastTag(UINT32* tag) -{ - s_gs->GetLastTag(tag); -} - -EXPORT_C GSsetFrameSkip(int frameskip) -{ - s_gs->SetFrameSkip(frameskip); -} - -EXPORT_C GSReplay(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) -{ - int renderer = -1; - - { - char* start = lpszCmdLine; - char* end = NULL; - long n = strtol(lpszCmdLine, &end, 10); - if(end > start) {renderer = n; lpszCmdLine = end;} - } - - while(*lpszCmdLine == ' ') lpszCmdLine++; - - ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); - - CAtlArray buff; - - if(FILE* fp = fopen(lpszCmdLine, "rb")) - { - GSinit(); - - BYTE regs[0x2000]; - GSsetBaseMem(regs); - - HWND hWnd = NULL; - GSopen(&hWnd, _T(""), true, renderer); - - DWORD crc; - fread(&crc, 4, 1, fp); - GSsetGameCRC(crc, 0); - - freezeData fd; - fread(&fd.size, 4, 1, fp); - fd.data = new BYTE[fd.size]; - fread(fd.data, fd.size, 1, fp); - GSfreeze(FREEZE_LOAD, &fd); - delete [] fd.data; - - fread(regs, 0x2000, 1, fp); - - long start = ftell(fp); - - unsigned int index, size, addr; - - GSvsync(1); - - while(1) - { - switch(fgetc(fp)) - { - case EOF: - fseek(fp, start, 0); - if(!IsWindowVisible(hWnd)) return; - break; - case 0: - index = fgetc(fp); - fread(&size, 4, 1, fp); - switch(index) - { - case 0: - if(buff.GetCount() < 0x4000) buff.SetCount(0x4000); - addr = 0x4000 - size; - fread(buff.GetData() + addr, size, 1, fp); - GSgifTransfer1(buff.GetData(), addr); - break; - case 1: - if(buff.GetCount() < size) buff.SetCount(size); - fread(buff.GetData(), size, 1, fp); - GSgifTransfer2(buff.GetData(), size / 16); - break; - case 2: - if(buff.GetCount() < size) buff.SetCount(size); - fread(buff.GetData(), size, 1, fp); - GSgifTransfer3(buff.GetData(), size / 16); - break; - } - break; - case 1: - GSvsync(fgetc(fp)); - if(!IsWindowVisible(hWnd)) return; - break; - case 2: - fread(&size, 4, 1, fp); - if(buff.GetCount() < size) buff.SetCount(size); - GSreadFIFO2(buff.GetData(), size / 16); - break; - case 3: - fread(regs, 0x2000, 1, fp); - break; - default: - return; - } - } - - GSclose(); - - GSshutdown(); - - fclose(fp); - } -} - -EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) -{ - ::SetPriorityClass(::GetCurrentProcess(), HIGH_PRIORITY_CLASS); - - FILE* file = _tfopen(_T("c:\\log.txt"), _T("a")); - - _ftprintf(file, _T("-------------------------\n\n")); - - if(1) - { - GSLocalMemory mem; - - static struct {int psm; LPCSTR name;} s_format[] = - { - {PSM_PSMCT32, "32"}, - {PSM_PSMCT24, "24"}, - {PSM_PSMCT16, "16"}, - {PSM_PSMCT16S, "16S"}, - {PSM_PSMT8, "8"}, - {PSM_PSMT4, "4"}, - {PSM_PSMT8H, "8H"}, - {PSM_PSMT4HL, "4HL"}, - {PSM_PSMT4HH, "4HH"}, - {PSM_PSMZ32, "32Z"}, - {PSM_PSMZ24, "24Z"}, - {PSM_PSMZ16, "16Z"}, - {PSM_PSMZ16S, "16ZS"}, - }; - - BYTE* ptr = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); - - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (BYTE)i; - - // - - for(int tbw = 5; tbw <= 10; tbw++) - { - int n = 256 << ((10 - tbw) * 2); - - int w = 1 << tbw; - int h = 1 << tbw; - - _ftprintf(file, _T("%d x %d\n\n"), w, h); - - for(int i = 0; i < countof(s_format); i++) - { - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[s_format[i].psm]; - - GSLocalMemory::writeImage wi = psm.wi; - GSLocalMemory::readImage ri = psm.ri; - GSLocalMemory::readTexture rtx = psm.rtx; - GSLocalMemory::readTexture rtxP = psm.rtxP; - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.SBP = 0; - BITBLTBUF.SBW = w / 64; - BITBLTBUF.SPSM = s_format[i].psm; - BITBLTBUF.DBP = 0; - BITBLTBUF.DBW = w / 64; - BITBLTBUF.DPSM = s_format[i].psm; - - GIFRegTRXPOS TRXPOS; - - TRXPOS.SSAX = 0; - TRXPOS.SSAY = 0; - TRXPOS.DSAX = 0; - TRXPOS.DSAY = 0; - - GIFRegTRXREG TRXREG; - - TRXREG.RRW = w; - TRXREG.RRH = h; - - CRect r(0, 0, w, h); - - GIFRegTEX0 TEX0; - - TEX0.TBP0 = 0; - TEX0.TBW = w / 64; - - GIFRegTEXA TEXA; - - TEXA.TA0 = 0; - TEXA.TA1 = 0x80; - TEXA.AEM = 0; - - int trlen = w * h * psm.trbpp / 8; - int len = w * h * psm.bpp / 8; - - clock_t start, end; - - _ftprintf(file, _T("[%4s] "), s_format[i].name); - - start = clock(); - - for(int j = 0; j < n; j++) - { - int x = 0; - int y = 0; - - (mem.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - } - - end = clock(); - - _ftprintf(file, _T("%6d %6d | "), (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - start = clock(); - - for(int j = 0; j < n; j++) - { - int x = 0; - int y = 0; - - (mem.*ri)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - } - - end = clock(); - - _ftprintf(file, _T("%6d %6d | "), (int)((float)trlen * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - start = clock(); - - for(int j = 0; j < n; j++) - { - (mem.*rtx)(r, ptr, w * 4, TEX0, TEXA); - } - - end = clock(); - - _ftprintf(file, _T("%6d %6d "), (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - - if(psm.pal > 0) - { - start = clock(); - - for(int j = 0; j < n; j++) - { - (mem.*rtxP)(r, ptr, w, TEX0, TEXA); - } - - end = clock(); - - _ftprintf(file, _T("| %6d %6d "), (int)((float)len * n / (end - start) / 1000), (int)((float)(w * h) * n / (end - start) / 1000)); - } - - _ftprintf(file, _T("\n")); - - fflush(file); - } - - _ftprintf(file, _T("\n")); - } - - _aligned_free(ptr); - } - - if(0) - { - BYTE regs[0x2000]; - GSsetBaseMem(regs); - - HWND hWnd = NULL; - GSopen(&hWnd, _T(""), true, 6); - - s_gs->m_env.COLCLAMP.CLAMP = 1; - s_gs->m_env.PRIM.ABE = 0; - s_gs->m_env.PRIM.FST = 1; - s_gs->m_env.PRIM.TME = 1; - s_gs->m_env.PRIM.IIP = 0; - s_gs->m_env.TEXA.TA0 = 0; - s_gs->m_env.TEXA.TA1 = 0x80; - s_gs->m_env.TEXA.AEM = 0; - s_gs->m_context->ALPHA.A = 0; - s_gs->m_context->ALPHA.B = 1; - s_gs->m_context->ALPHA.C = 0; - s_gs->m_context->ALPHA.D = 1; - s_gs->m_context->CLAMP.WMS = 1; - s_gs->m_context->CLAMP.WMT = 1; - s_gs->m_context->CLAMP.MINU = 0; - s_gs->m_context->CLAMP.MINV = 0; - s_gs->m_context->CLAMP.MAXU = 511; - s_gs->m_context->CLAMP.MAXV = 511; - s_gs->m_context->FRAME.FBP = 0 >> 5; - s_gs->m_context->FRAME.FBW = 8; - s_gs->m_context->FRAME.PSM = PSM_PSMCT16S; - s_gs->m_context->SCISSOR.SCAX0 = 0; - s_gs->m_context->SCISSOR.SCAY0 = 0; - s_gs->m_context->SCISSOR.SCAX1 = 511; - s_gs->m_context->SCISSOR.SCAY1 = 511; - s_gs->m_context->TEST.ZTE = 0; - s_gs->m_context->TEST.ZTST = 2; - s_gs->m_context->TEX0.TBP0 = 0x2000; - s_gs->m_context->TEX0.TBW = 8; - s_gs->m_context->TEX0.PSM = PSM_PSMCT32; - s_gs->m_context->TEX0.TFX = 1; - s_gs->m_context->TEX0.TCC = 0; - s_gs->m_context->TEX0.TW = 9; - s_gs->m_context->TEX0.TH = 9; - s_gs->m_context->TEX1.MMAG = 0; - s_gs->m_context->TEX1.MMIN = 0; - s_gs->m_context->ZBUF.ZBP = 0x1000 >> 5; - s_gs->m_context->ZBUF.PSM = PSM_PSMZ24; - - GSRasterizer* ras = ((GSRendererSW*)s_gs)->GetRasterizer(); - - int count = 512 * 512; - - GSVertexSW* vertices = (GSVertexSW*)_aligned_malloc(count * sizeof(GSVertexSW), 16); -/* - // point - - for(int j = 0; j < 512; j++) - { - for(int i = 0; i < 512; i++) - { - GSVertexSW& v = vertices[(j << 7) + i]; - - v.p = GSVector4(i, j, 0, 0); - v.t = GSVector4((float)i + 0.5, (float)j + 0.5, 1.0f, 0.0f); - v.c = GSVector4(128.0f); - } - } - - s_gs->PRIM->PRIM = GS_POINTLIST; - - ras->Draw(vertices, count); - - vertices[0].p = GSVector4(0, 0, 0, 0); - vertices[0].t = GSVector4(0.5, 0.5, 1.0f, 0.0f); - vertices[0].c = GSVector4(128.0f); - vertices[1].p = GSVector4(512, 512, 0, 0); - vertices[1].t = GSVector4(512.5f, 512.5f, 1.0f, 0.0f); - vertices[1].c = GSVector4(128.0f); - - for(int i = 2; i < 512 * 512; i += 2) - { - memcpy(&vertices[i], &vertices[0], sizeof(vertices[0]) * 2); - } - - // sprite - - s_gs->PRIM->PRIM = GS_SPRITE; - - ras->Draw(vertices, count); - - // triangle - - vertices[0].p = GSVector4(0, 0, 0, 0); - vertices[0].t = GSVector4(0.5, 0.5, 1.0f, 0.0f); - vertices[0].c = GSVector4(128.0f); - vertices[1].p = GSVector4(512, 0, 0, 0); - vertices[1].t = GSVector4(512.5f, 0.5f, 1.0f, 0.0f); - vertices[1].c = GSVector4(128.0f); - vertices[2].p = GSVector4(512, 512, 0, 0); - vertices[2].t = GSVector4(512.5f, 512.5f, 1.0f, 0.0f); - vertices[2].c = GSVector4(128.0f); - - for(int i = 3; i < 512 * 512 - 2; i += 3) - { - memcpy(&vertices[i], &vertices[0], sizeof(vertices[0]) * 3); - } - - s_gs->PRIM->PRIM = GS_TRIANGLELIST; - - ras->Draw(vertices, 999); -*/ - // - - _aligned_free(vertices); - - GSclose(); - } - - // - - if(0) - { - GSLocalMemory mem; - - BYTE* ptr = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); - - for(int i = 0; i < 1024 * 1024 * 4; i++) ptr[i] = (BYTE)i; - - const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[PSM_PSMCT32]; - - GSLocalMemory::writeImage wi = psm.wi; - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.DBP = 0; - BITBLTBUF.DBW = 32; - BITBLTBUF.DPSM = PSM_PSMCT32; - - GIFRegTRXPOS TRXPOS; - - TRXPOS.DSAX = 0; - TRXPOS.DSAY = 1; - - GIFRegTRXREG TRXREG; - - TRXREG.RRW = 256; - TRXREG.RRH = 256; - - int trlen = 256 * 256 * psm.trbpp / 8; - - int x = 0; - int y = 0; - - (mem.*wi)(x, y, ptr, trlen, BITBLTBUF, TRXPOS, TRXREG); - } - - // - - fclose(file); -} +} \ No newline at end of file diff --git a/gsdx/GSdx.def b/gsdx/GSdx.def index c19163a..fd1bf9d 100644 --- a/gsdx/GSdx.def +++ b/gsdx/GSdx.def @@ -33,4 +33,31 @@ EXPORTS GSsetFrameSkip GSgetLastTag GSReplay - GSBenchmark \ No newline at end of file + GSBenchmark + PSEgetLibType + PSEgetLibName + PSEgetLibVersion + GPUinit + GPUshutdown + GPUopen + GPUclose + GPUconfigure + GPUabout + GPUtest + GPUwriteData + GPUwriteStatus + GPUreadData + GPUreadStatus + GPUdmaChain + GPUgetMode + GPUsetMode + GPUupdateLace + GPUmakeSnapshot + GPUwriteDataMem + GPUreadDataMem + GPUdisplayText + GPUdisplayFlags + GPUfreeze + GPUshowScreenPic + GPUgetScreenPic + GPUcursor diff --git a/gsdx/GSdx_vs2005.vcproj b/gsdx/GSdx_vs2005.vcproj deleted file mode 100644 index 26beb40..0000000 --- a/gsdx/GSdx_vs2005.vcproj +++ /dev/null @@ -1,2711 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/gsdx/GSdx_vs2008.vcproj b/gsdx/GSdx_vs2008.vcproj index ca35833..48aed69 100644 --- a/gsdx/GSdx_vs2008.vcproj +++ b/gsdx/GSdx_vs2008.vcproj @@ -1051,6 +1051,54 @@ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" > + + + + + + + + + + + + + + + + + + + + + + @@ -1597,6 +1645,46 @@ Filter="h;hpp;hxx;hm;inl;inc;xsd" UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}" > + + + + + + + + + + + + + + + + + + + +