This commit is contained in:
gabest
2008-12-04 10:32:06 +00:00
parent ef7dc5be54
commit 61a2aa997a
25 changed files with 1128 additions and 450 deletions

View File

@@ -24,6 +24,7 @@
#include "GPURendererSW.h"
#include "GSDevice9.h"
#include "GSDevice10.h"
#include "GPUSettingsDlg.h"
#define PSE_LT_GPU 2
@@ -97,12 +98,23 @@ EXPORT_C_(INT32) GPUopen(HWND hWnd)
GPURendererSettings rs;
rs.m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1);
rs.m_dither = AfxGetApp()->GetProfileInt(_T("Settings"), _T("dither"), 1);
rs.m_aspectratio = AfxGetApp()->GetProfileInt(_T("Settings"), _T("aspectratio"), 1);
rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
rs.m_filter = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("filter"), 0);
rs.m_dither = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("dithering"), 1);
rs.m_aspectratio = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("AspectRatio"), 1);
rs.m_vsync = !!AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("vsync"), FALSE);
rs.m_scale.cx = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("scale_x"), 0);
rs.m_scale.cy = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("scale_y"), 0);
s_gpu = new GPURendererSW<GSDevice9>(rs);
int renderer = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("Renderer"), 1);
switch(renderer)
{
default:
// TODO: case 0: s_gpu = new GPURendererSW<GSDevice7>(rs); break;
case 1: s_gpu = new GPURendererSW<GSDevice9>(rs); break;
case 2: s_gpu = new GPURendererSW<GSDevice10>(rs); break;
// TODO: case 3: s_gpu = new GPURendererNull<GSDeviceNull>(rs); break;
}
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
@@ -118,7 +130,15 @@ EXPORT_C_(INT32) GPUopen(HWND hWnd)
EXPORT_C_(INT32) GPUconfigure()
{
// TODO
AFX_MANAGE_STATE(AfxGetStaticModuleState());
GPUSettingsDlg dlg;
if(IDOK == dlg.DoModal())
{
GPUshutdown();
GPUinit();
}
return 0;
}
@@ -173,8 +193,16 @@ EXPORT_C_(UINT32) GPUdmaChain(const BYTE* mem, UINT32 addr)
{
// TODO
UINT32 last[3];
memset(last, 0xff, sizeof(last));
do
{
if(addr == last[1] || addr == last[2]) break;
(addr < last[0] ? last[1] : last[2]) = addr;
last[0] = addr;
BYTE size = mem[addr + 3];
if(size > 0)

View File

@@ -58,7 +58,7 @@ void GPUDrawScanline::SetupDraw(Vertex* vertices, int count, const void* texture
m_sel.tlu = env.STATUS.TP < 2;
m_sel.twin = (env.TWIN.ai32 & 0xfffff) != 0;
m_sel.dtd = m_dither ? env.STATUS.DTD : 0;
m_sel.ltf = m_filter < 2 ? m_filter : env.PRIM.TYPE == GPU_POLYGON ? 1 : 0;
m_sel.ltf = m_filter == 1 && env.PRIM.TYPE == GPU_POLYGON || m_filter == 2 ? 1 : 0;
m_dsf = m_ds[m_sel];
@@ -84,8 +84,8 @@ void GPUDrawScanline::SetupDraw(Vertex* vertices, int count, const void* texture
u = env.TWIN.TWX << 3;
v = env.TWIN.TWY << 3;
m_slenv.u[1] = GSVector4i((u << 16) | u);
m_slenv.v[1] = GSVector4i((v << 16) | v);
m_slenv.u[1] = GSVector4i((u << 16) | u) & ~m_slenv.u[0];
m_slenv.v[1] = GSVector4i((v << 16) | v) & ~m_slenv.v[0];
}
}
@@ -95,30 +95,21 @@ void GPUDrawScanline::SetupDraw(Vertex* vertices, int count, const void* texture
void GPUDrawScanline::SetupScanline(const Vertex& dv)
{
// we could use integers here but it's more accurate to multiply a float than a 8.8 fixed point number
GSVector4 ps0123 = GSVector4::ps0123();
GSVector4 ps4567 = GSVector4::ps4567();
GSVector4 dt = dv.t;
GSVector4i dtc8 = GSVector4i(dv.t * 8.0f).ps32(GSVector4i(dv.c * 8.0f));
m_slenv.ds[0] = dt.xxxx() * ps0123;
m_slenv.dt[0] = dt.yyyy() * ps0123;
m_slenv.ds[1] = dt.xxxx() * ps4567;
m_slenv.dt[1] = dt.yyyy() * ps4567;
m_slenv.ds[2] = dt.xxxx() * 8.0f;
m_slenv.dt[2] = dt.yyyy() * 8.0f;
m_slenv.ds = GSVector4i(dv.t.xxxx() * ps0123).ps32(GSVector4i(dv.t.xxxx() * ps4567));
m_slenv.dt = GSVector4i(dv.t.yyyy() * ps0123).ps32(GSVector4i(dv.t.yyyy() * ps4567));
m_slenv.dst8 = dtc8.upl16(dtc8);
GSVector4i dc;
dc = GSVector4i(dv.c);
dc = dc.ps32();
dc = dc.upl16(dc);
GSVector4i s_01234567(0x00010000, 0x00030002, 0x00050004, 0x00070006); // TODO
m_slenv.dr = dc.xxxx().mul16l(s_01234567);
m_slenv.dg = dc.yyyy().mul16l(s_01234567);
m_slenv.db = dc.zzzz().mul16l(s_01234567);
m_slenv.dc = dc.sll16(3);
m_slenv.dr = GSVector4i(dv.c.xxxx() * ps0123).ps32(GSVector4i(dv.c.xxxx() * ps4567));
m_slenv.dg = GSVector4i(dv.c.yyyy() * ps0123).ps32(GSVector4i(dv.c.yyyy() * ps4567));
m_slenv.db = GSVector4i(dv.c.zzzz() * ps0123).ps32(GSVector4i(dv.c.zzzz() * ps4567));
m_slenv.dc8 = dtc8.uph16(dtc8);
}
void GPUDrawScanline::DrawScanline(int top, int left, int right, const Vertex& v)
@@ -138,144 +129,121 @@ IDrawScanline::DrawScanlinePtr GPUDrawScanline::GetDrawScanlinePtr()
return m_dsf;
}
void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4* s, const GSVector4* t, GSVector4i* c)
void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c)
{
const void* RESTRICT tex = m_slenv.tex;
const WORD* RESTRICT clut = m_slenv.clut;
if(ltf)
{
GSVector4i cc[8];
GSVector4i u = s.sub16(GSVector4i(0x00200020)); // - 0.125f
GSVector4i v = t.sub16(GSVector4i(0x00200020)); // - 0.125f
for(int j = 0; j < 2; j++)
GSVector4i u0 = u.srl16(8);
GSVector4i v0 = v.srl16(8);
GSVector4i u1 = u0.add16(GSVector4i::x0001());
GSVector4i v1 = v0.add16(GSVector4i::x0001());
GSVector4i uf = u & GSVector4i::x00ff();
GSVector4i vf = v & GSVector4i::x00ff();
if(twin)
{
GSVector4 ss = s[j] - 0.25f;
GSVector4 tt = t[j] - 0.25f;
GSVector4 uf = ss.floor();
GSVector4 vf = tt.floor();
GSVector4 uff = ss - uf;
GSVector4 vff = tt - vf;
GSVector4i u = GSVector4i(uf);
GSVector4i v = GSVector4i(vf);
GSVector4i u01 = GSVector4i(u).ps32(u + GSVector4i::x00000001());
GSVector4i v01 = GSVector4i(v).ps32(v + GSVector4i::x00000001());
if(twin)
{
u01 = (u01 & m_slenv.u[0]).add16(m_slenv.u[1]);
v01 = (v01 & m_slenv.v[0]).add16(m_slenv.v[1]);
}
GSVector4i uv01 = u01.pu16(v01);
GSVector4i addr0011 = uv01.upl8(uv01.zwxy());
GSVector4i addr0110 = uv01.upl8(uv01.wzyx());
GSVector4i c0011, c0110;
#if _M_SSE >= 0x401
if(tlu)
{
c0011 = addr0011.gather16_16((const BYTE*)tex).gather16_16(clut);
c0110 = addr0110.gather16_16((const BYTE*)tex).gather16_16(clut);
}
else
{
c0011 = addr0011.gather16_16((const WORD*)tex);
c0110 = addr0110.gather16_16((const WORD*)tex);
}
#else
int i = 0;
if(tlu)
{
do
{
c0011.u16[i] = clut[((const BYTE*)tex)[addr0011.u16[i]]];
c0110.u16[i] = clut[((const BYTE*)tex)[addr0110.u16[i]]];
}
while(++i < 8);
}
else
{
do
{
c0011.u16[i] = ((const WORD*)tex)[addr0011.u16[i]];
c0110.u16[i] = ((const WORD*)tex)[addr0110.u16[i]];
}
while(++i < 8);
}
#endif
GSVector4i r0011 = GSVector4i(c0011 & 0x001f001f) << 3;
GSVector4i r0110 = GSVector4i(c0110 & 0x001f001f) << 3;
GSVector4 r00 = GSVector4(r0011.upl16());
GSVector4 r01 = GSVector4(r0110.upl16());
GSVector4 r10 = GSVector4(r0110.uph16());
GSVector4 r11 = GSVector4(r0011.uph16());
r00 = r00.lerp(r01, vff);
r10 = r10.lerp(r11, vff);
r00 = r00.lerp(r10, uff);
cc[j * 4 + 0] = GSVector4i(r00);
GSVector4i g0011 = GSVector4i(c0011 & 0x03e003e0) >> 2;
GSVector4i g0110 = GSVector4i(c0110 & 0x03e003e0) >> 2;
GSVector4 g00 = GSVector4(g0011.upl16());
GSVector4 g01 = GSVector4(g0110.upl16());
GSVector4 g10 = GSVector4(g0110.uph16());
GSVector4 g11 = GSVector4(g0011.uph16());
g00 = g00.lerp(g01, vff);
g10 = g10.lerp(g11, vff);
g00 = g00.lerp(g10, uff);
cc[j * 4 + 1] = GSVector4i(g00);
GSVector4i b0011 = GSVector4i(c0011 & 0x7c007c00) >> 7;
GSVector4i b0110 = GSVector4i(c0110 & 0x7c007c00) >> 7;
GSVector4 b00 = GSVector4(b0011.upl16());
GSVector4 b01 = GSVector4(b0110.upl16());
GSVector4 b10 = GSVector4(b0110.uph16());
GSVector4 b11 = GSVector4(b0011.uph16());
b00 = b00.lerp(b01, vff);
b10 = b10.lerp(b11, vff);
b00 = b00.lerp(b10, uff);
cc[j * 4 + 2] = GSVector4i(b00);
GSVector4i a0011 = GSVector4i(c0011 & 0x80008000);
GSVector4i a0110 = GSVector4i(c0110 & 0x80008000);
GSVector4 a00 = GSVector4(a0011.upl16());
GSVector4 a01 = GSVector4(a0110.upl16());
GSVector4 a10 = GSVector4(a0110.uph16());
GSVector4 a11 = GSVector4(a0011.uph16());
a00 = a00.lerp(a01, vff);
a10 = a10.lerp(a11, vff);
a00 = a00.lerp(a10, uff);
cc[j * 4 + 3] = GSVector4i(a00);
u0 = (u0 & m_slenv.u[0]).add16(m_slenv.u[1]);
v0 = (v0 & m_slenv.v[0]).add16(m_slenv.v[1]);
u1 = (u1 & m_slenv.u[0]).add16(m_slenv.u[1]);
v1 = (v1 & m_slenv.v[0]).add16(m_slenv.v[1]);
}
c[0] = cc[0].ps32(cc[4]);
c[1] = cc[1].ps32(cc[5]);
c[2] = cc[2].ps32(cc[6]);
c[3] = cc[3].ps32(cc[7]).gt16(GSVector4i::zero());
GSVector4i addr00 = v0.sll16(8) | u0;
GSVector4i addr01 = v0.sll16(8) | u1;
GSVector4i addr10 = v1.sll16(8) | u0;
GSVector4i addr11 = v1.sll16(8) | u1;
GSVector4i c00, c01, c10, c11;
#if _M_SSE >= 0x401
if(tlu)
{
c00 = addr00.gather16_16((const BYTE*)tex).gather16_16(clut);
c01 = addr01.gather16_16((const BYTE*)tex).gather16_16(clut);
c10 = addr10.gather16_16((const BYTE*)tex).gather16_16(clut);
c11 = addr11.gather16_16((const BYTE*)tex).gather16_16(clut);
}
else
{
c00 = addr00.gather16_16((const WORD*)tex);
c01 = addr01.gather16_16((const WORD*)tex);
c10 = addr00.gather16_16((const WORD*)tex);
c11 = addr01.gather16_16((const WORD*)tex);
}
#else
int i = 0;
if(tlu)
{
do
{
c00.u16[i] = clut[((const BYTE*)tex)[addr00.u16[i]]];
c01.u16[i] = clut[((const BYTE*)tex)[addr01.u16[i]]];
c10.u16[i] = clut[((const BYTE*)tex)[addr10.u16[i]]];
c11.u16[i] = clut[((const BYTE*)tex)[addr11.u16[i]]];
}
while(++i < 8);
}
else
{
do
{
c00.u16[i] = ((const WORD*)tex)[addr00.u16[i]];
c01.u16[i] = ((const WORD*)tex)[addr01.u16[i]];
c10.u16[i] = ((const WORD*)tex)[addr10.u16[i]];
c11.u16[i] = ((const WORD*)tex)[addr11.u16[i]];
}
while(++i < 8);
}
#endif
GSVector4i r00 = (c00 & 0x001f001f) << 2;
GSVector4i r01 = (c01 & 0x001f001f) << 2;
GSVector4i r10 = (c10 & 0x001f001f) << 2;
GSVector4i r11 = (c11 & 0x001f001f) << 2;
r00 = r00.add16(r01.sub16(r00).mul16l(uf).sra16(8));
r10 = r10.add16(r11.sub16(r10).mul16l(uf).sra16(8));
c[0] = r00.add16(r10.sub16(r00).mul16l(vf).sra16(8)) << 1;
GSVector4i g00 = (c00 & 0x03e003e0) >> 3;
GSVector4i g01 = (c01 & 0x03e003e0) >> 3;
GSVector4i g10 = (c10 & 0x03e003e0) >> 3;
GSVector4i g11 = (c11 & 0x03e003e0) >> 3;
g00 = g00.add16(g01.sub16(g00).mul16l(uf).sra16(8));
g10 = g10.add16(g11.sub16(g10).mul16l(uf).sra16(8));
c[1] = g00.add16(g10.sub16(g00).mul16l(vf).sra16(8)) << 1;
GSVector4i b00 = (c00 & 0x7c007c00) >> 8;
GSVector4i b01 = (c01 & 0x7c007c00) >> 8;
GSVector4i b10 = (c10 & 0x7c007c00) >> 8;
GSVector4i b11 = (c11 & 0x7c007c00) >> 8;
b00 = b00.add16(b01.sub16(b00).mul16l(uf).sra16(8));
b10 = b10.add16(b11.sub16(b10).mul16l(uf).sra16(8));
c[2] = b00.add16(b10.sub16(b00).mul16l(vf).sra16(8)) << 1;
GSVector4i a00 = (c00 & 0x80008000) >> 9;
GSVector4i a01 = (c01 & 0x80008000) >> 9;
GSVector4i a10 = (c10 & 0x80008000) >> 9;
GSVector4i a11 = (c11 & 0x80008000) >> 9;
a00 = a00.add16(a01.sub16(a00).mul16l(uf).sra16(8));
a10 = a10.add16(a11.sub16(a10).mul16l(uf).sra16(8));
c[3] = a00.add16(a10.sub16(a00).mul16l(vf).sra16(8)).gt16(GSVector4i::zero());
// mask out blank pixels (not perfect)
@@ -287,10 +255,8 @@ void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin
}
else
{
GSVector4i u, v;
u = GSVector4i(s[0]).ps32(GSVector4i(s[1]));
v = GSVector4i(t[0]).ps32(GSVector4i(t[1]));
GSVector4i u = s.srl16(8);
GSVector4i v = t.srl16(8);
if(twin)
{
@@ -298,9 +264,7 @@ void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin
v = (v & m_slenv.v[0]).add16(m_slenv.v[1]);
}
GSVector4i uv = u.pu16(v);
GSVector4i addr = uv.upl8(uv.zwxy());
GSVector4i addr = v.sll16(8) | u;
GSVector4i c00;
@@ -378,6 +342,7 @@ void GPUDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i&
__assume(0);
}
}
void GPUDrawScanline::AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c)
{
GSVector4i r = (d & 0x001f001f) << 3;
@@ -455,6 +420,8 @@ void GPUDrawScanline::Init()
m_ds[i] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineT;
}
#ifdef FAST_DRAWSCANLINE
m_ds[0x00] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0x00>;
m_ds[0x01] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0x01>;
m_ds[0x02] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0x02>;
@@ -711,6 +678,8 @@ void GPUDrawScanline::Init()
m_ds[0xfd] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0xfd>;
m_ds[0xfe] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0xfe>;
m_ds[0xff] = (DrawScanlinePtr)&GPUDrawScanline::DrawScanlineExT<0xff>;
#endif
}
__declspec(align(16)) static WORD s_dither[4][16] =
@@ -723,28 +692,19 @@ __declspec(align(16)) static WORD s_dither[4][16] =
void GPUDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex& v)
{
GSVector4 s[2], t[2];
GSVector4 vt = v.t;
s[0] = vt.xxxx(); s[1] = s[0];
t[0] = vt.yyyy(); t[1] = t[0];
GSVector4i s, t;
GSVector4i r, g, b;
if(m_sel.tme)
{
s[0] += m_slenv.ds[0];
t[0] += m_slenv.dt[0];
s[1] += m_slenv.ds[1];
t[1] += m_slenv.dt[1];
GSVector4i vt = GSVector4i(v.t).xxzzl();
s = vt.xxxx().add16(m_slenv.ds);
t = vt.yyyy().add16(m_slenv.dt);
}
GSVector4i vc = GSVector4i(v.c);
GSVector4i vc = GSVector4i(v.c).xxzzl().xxzzh();
vc = vc.ps32(vc);
vc = vc.upl16(vc);
GSVector4i r, g, b;
r = vc.xxxx();
g = vc.yyyy();
b = vc.zzzz();
@@ -771,7 +731,7 @@ void GPUDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex&
{
do
{
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(8)));
int pixels = GSVector4i::min_i16(steps, 8);
GSVector4i test = GSVector4i::zero();
@@ -825,22 +785,19 @@ void GPUDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex&
if(m_sel.tme)
{
GSVector4 ds = m_slenv.ds[2];
GSVector4 dt = m_slenv.dt[2];
GSVector4i dst8 = m_slenv.dst8;
s[0] += ds;
t[0] += dt;
s[1] += ds;
t[1] += dt;
s = s.add16(dst8.xxxx());
t = t.add16(dst8.yyyy());
}
if(m_sel.iip)
{
GSVector4i dc = m_slenv.dc;
GSVector4i dc8 = m_slenv.dc8;
r = r.add16(dc.xxxx());
g = g.add16(dc.yyyy());
b = b.add16(dc.zzzz());
r = r.add16(dc8.xxxx());
g = g.add16(dc8.yyyy());
b = b.add16(dc8.zzzz());
}
}
}
@@ -858,33 +815,24 @@ void GPUDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex
DWORD rfb = (sel >> 1) & 3;
DWORD tfx = (sel >> 5) & 3;
GSVector4 s[2], t[2];
GSVector4 vt = v.t;
GSVector4i s, t;
GSVector4i r, g, b;
s[0] = vt.xxxx(); s[1] = s[0];
t[0] = vt.yyyy(); t[1] = t[0];
if(tme)
{
s[0] += m_slenv.ds[0];
t[0] += m_slenv.dt[0];
s[1] += m_slenv.ds[1];
t[1] += m_slenv.dt[1];
GSVector4i vt = GSVector4i(v.t).xxzzl();
s = vt.xxxx().add16(m_slenv.ds);
t = vt.yyyy().add16(m_slenv.dt);
}
GSVector4i vc = GSVector4i(v.c);
GSVector4i vc = GSVector4i(v.c).xxzzl().xxzzh();
vc = vc.ps32(vc);
vc = vc.upl16(vc);
GSVector4i r, g, b;
r = vc.xxxx();
g = vc.yyyy();
b = vc.zzzz();
if(m_sel.iip)
if(iip)
{
r = r.add16(m_slenv.dr);
g = g.add16(m_slenv.dg);
@@ -906,7 +854,7 @@ void GPUDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex
{
do
{
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(8)));
int pixels = GSVector4i::min_i16(steps, 8);
GSVector4i test = GSVector4i::zero();
@@ -960,22 +908,19 @@ void GPUDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex
if(tme)
{
GSVector4 ds = m_slenv.ds[2];
GSVector4 dt = m_slenv.dt[2];
GSVector4i dst8 = m_slenv.dst8;
s[0] += ds;
t[0] += dt;
s[1] += ds;
t[1] += dt;
s = s.add16(dst8.xxxx());
t = t.add16(dst8.yyyy());
}
if(iip)
{
GSVector4i dc = m_slenv.dc;
GSVector4i dc8 = m_slenv.dc8;
r = r.add16(dc.xxxx());
g = g.add16(dc.yyyy());
b = b.add16(dc.zzzz());
r = r.add16(dc8.xxxx());
g = g.add16(dc8.yyyy());
b = b.add16(dc8.zzzz());
}
}
}

View File

@@ -70,8 +70,8 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
GSVector4i a;
GSVector4i md; // similar to gs fba
GSVector4 ds[3], dt[3];
GSVector4i dr, dg, db, dc;
GSVector4i ds, dt, dst8;
GSVector4i dr, dg, db, dc8;
};
ScanlineSelector m_sel;
@@ -86,7 +86,7 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
template<DWORD sel>
void DrawScanlineExT(int top, int left, int right, const Vertex& v);
__forceinline void SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4* s, const GSVector4* t, GSVector4i* c);
__forceinline void SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin, GSVector4i& test, const GSVector4i& s, const GSVector4i& t, GSVector4i* c);
__forceinline void ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i& g, const GSVector4i& b, GSVector4i* c);
__forceinline void AlphaBlend(UINT32 abr, UINT32 tme, const GSVector4i& d, GSVector4i* c);
__forceinline void WriteFrame(WORD* RESTRICT fb, const GSVector4i& test, const GSVector4i* c, int pixels);

View File

@@ -27,30 +27,27 @@ const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
static void CheckRect(const CRect& r)
GPULocalMemory::GPULocalMemory(const CSize& scale)
{
ASSERT(r.left >= 0 && r.left <= 1024);
ASSERT(r.right >= 0 && r.right <= 1024);
ASSERT(r.top >= 0 && r.top <= 512);
ASSERT(r.bottom >= 0 && r.bottom <= 512);
ASSERT(r.left <= r.right);
ASSERT(r.top <= r.bottom);
}
GPULocalMemory::GPULocalMemory()
{
m_vm = (WORD*)VirtualAlloc(NULL, m_size * 2, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
memset(m_vm, 0, m_size * 2);
m_scale.cx = min(max(scale.cx, 0), 2);
m_scale.cy = min(max(scale.cy, 0), 2);
//
m_clut.buff = m_vm + m_size;
int size = (1 << (12 + 11)) * sizeof(WORD);
m_vm = (WORD*)VirtualAlloc(NULL, size * 2, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
memset(m_vm, 0, size);
//
m_clut.buff = m_vm + size;
m_clut.dirty = true;
//
int size = 256 * 256 * (1 + 1 + 4) * 32;
size = 256 * 256 * (1 + 1 + 4) * 32;
m_texture.buff[0] = (BYTE*)VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
m_texture.buff[1] = m_texture.buff[0] + 256 * 256 * 32;
@@ -89,26 +86,54 @@ const WORD* GPULocalMemory::GetCLUT(int tp, int cx, int cy)
{
if(m_clut.dirty || m_clut.tp != tp || m_clut.cx != cx || m_clut.cy != cy)
{
WORD* src = GetPixelAddress(cx << (4 + 1), cy << 1);
WORD* src = GetPixelAddressScaled(cx << 4, cy);
WORD* dst = m_clut.buff;
// TODO: at normal horizontal resolution just return src
if(tp == 0)
if(m_scale.cx == 0)
{
for(int i = 0; i < 16; i++)
memcpy(dst, src, (tp == 0 ? 16 : 256) * 2);
}
else if(m_scale.cx == 1)
{
if(tp == 0)
{
dst[i] = src[i * 2];
for(int i = 0; i < 16; i++)
{
dst[i] = src[i * 2];
}
}
else if(tp == 1)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(tp == 1)
else if(m_scale.cx == 2)
{
for(int i = 0; i < 256; i++)
if(tp == 0)
{
dst[i] = src[i * 2];
for(int i = 0; i < 16; i++)
{
dst[i] = src[i * 4];
}
}
else if(tp == 1)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
m_clut.tp = tp;
m_clut.cx = cx;
m_clut.cy = cy;
@@ -203,16 +228,16 @@ void GPULocalMemory::Invalidate(const CRect& r)
void GPULocalMemory::FillRect(const CRect& r, WORD c)
{
CheckRect(r);
Invalidate(r);
WORD* RESTRICT dst = GetPixelAddress(r.left << 1, r.top << 1);
WORD* RESTRICT dst = GetPixelAddressScaled(r.left, r.top);
int w = r.Width() << 1;
int h = r.Height() << 1;
int w = r.Width() << m_scale.cx;
int h = r.Height() << m_scale.cy;
for(int j = 0; j < h; j++, dst += m_width)
int pitch = GetWidth();
for(int j = 0; j < h; j++, dst += pitch)
{
for(int i = 0; i < w; i++)
{
@@ -223,122 +248,268 @@ void GPULocalMemory::FillRect(const CRect& r, WORD c)
void GPULocalMemory::WriteRect(const CRect& r, const WORD* RESTRICT src)
{
CheckRect(r);
Invalidate(r);
WORD* RESTRICT dst = GetPixelAddress(r.left << 1, r.top << 1);
WORD* RESTRICT dst = GetPixelAddressScaled(r.left, r.top);
int w = r.Width();
int h = r.Height();
for(int j = 0; j < h; j++, src += w, dst += m_width << 1)
int pitch = GetWidth();
if(m_scale.cx == 0)
{
for(int i = 0; i < w; i++)
for(int j = 0; j < h; j++, src += w)
{
dst[i * 2] = src[i];
dst[i * 2 + 1] = src[i];
dst[i * 2 + m_width] = src[i];
dst[i * 2 + m_width + 1] = src[i];
for(int k = 1 << m_scale.cy; k >= 1; k--, dst += pitch)
{
memcpy(dst, src, w * 2);
}
}
}
else if(m_scale.cx == 1)
{
for(int j = 0; j < h; j++, src += w)
{
for(int k = 1 << m_scale.cy; k >= 1; k--, dst += pitch)
{
for(int i = 0; i < w; i++)
{
dst[i * 2 + 0] = src[i];
dst[i * 2 + 1] = src[i];
}
}
}
}
else if(m_scale.cx == 2)
{
for(int j = 0; j < h; j++, src += w)
{
for(int k = 1 << m_scale.cy; k >= 1; k--, dst += pitch)
{
for(int i = 0; i < w; i++)
{
dst[i * 4 + 0] = src[i];
dst[i * 4 + 1] = src[i];
dst[i * 4 + 2] = src[i];
dst[i * 4 + 3] = src[i];
}
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadRect(const CRect& r, WORD* RESTRICT dst)
{
CheckRect(r);
WORD* RESTRICT src = GetPixelAddress(r.left << 1, r.top << 1);
WORD* RESTRICT src = GetPixelAddressScaled(r.left, r.top);
int w = r.Width();
int h = r.Height();
for(int j = 0; j < h; j++, src += m_width << 1, dst += w)
int pitch = GetWidth() << m_scale.cy;
if(m_scale.cx == 0)
{
for(int i = 0; i < w; i++)
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
dst[i] = src[i * 2];
memcpy(dst, src, w * 2);
}
}
else if(m_scale.cx == 1)
{
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
for(int i = 0; i < w; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(m_scale.cx == 2)
{
for(int j = 0; j < h; j++, src += pitch, dst += w)
{
for(int i = 0; i < w; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::MoveRect(const CPoint& src, const CPoint& dst, int w, int h)
{
CheckRect(CRect(src, CSize(w, h)));
CheckRect(CRect(dst, CSize(w, h)));
Invalidate(CRect(dst, CSize(w, h)));
WORD* s = GetPixelAddress(src.x << 1, src.y << 1);
WORD* d = GetPixelAddress(dst.x << 1, dst.y << 1);
WORD* s = GetPixelAddressScaled(src.x, src.y);
WORD* d = GetPixelAddressScaled(dst.x, dst.y);
w <<= 1;
h <<= 1;
w <<= m_scale.cx;
h <<= m_scale.cy;
for(int i = 0; i < h; i++, s += m_width, d += m_width)
int pitch = GetWidth();
for(int i = 0; i < h; i++, s += pitch, d += pitch)
{
memcpy(d, s, w * sizeof(WORD));
}
}
void GPULocalMemory::ReadPage4(int tx, int ty, BYTE* dst)
void GPULocalMemory::ReadPage4(int tx, int ty, BYTE* RESTRICT dst)
{
GSVector4i mask(0x0f0f0f0f);
WORD* src = GetPixelAddress(tx << (6 + 1), ty << (8 + 1));
WORD* src = GetPixelAddressScaled(tx << 6, ty << 8);
for(int j = 0; j < 256; j++, src += m_width << 1, dst += 256)
int pitch = GetWidth() << m_scale.cy;
if(m_scale.cx == 0)
{
for(int i = 0; i < 64; i++)
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
dst[i * 4 + 0] = (src[i * 2] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i * 2] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i * 2] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i * 2] >> 12) & 0xf;
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i] >> 12) & 0xf;
}
}
}
else if(m_scale.cx == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i * 2] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i * 2] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i * 2] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i * 2] >> 12) & 0xf;
}
}
}
else if(m_scale.cx == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 64; i++)
{
dst[i * 4 + 0] = (src[i * 4] >> 0) & 0xf;
dst[i * 4 + 1] = (src[i * 4] >> 4) & 0xf;
dst[i * 4 + 2] = (src[i * 4] >> 8) & 0xf;
dst[i * 4 + 3] = (src[i * 4] >> 12) & 0xf;
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadPage8(int tx, int ty, BYTE* dst)
void GPULocalMemory::ReadPage8(int tx, int ty, BYTE* RESTRICT dst)
{
WORD* src = GetPixelAddress(tx << (6 + 1), ty << (8 + 1));
WORD* src = GetPixelAddressScaled(tx << 6, ty << 8);
for(int j = 0; j < 256; j++, src += m_width << 1, dst += 256)
int pitch = GetWidth() << m_scale.cy;
if(m_scale.cx == 0)
{
for(int i = 0; i < 128; i++)
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
((WORD*)dst)[i] = src[i * 2];
memcpy(dst, src, 256);
}
}
else if(m_scale.cx == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 128; i++)
{
((WORD*)dst)[i] = src[i * 2];
}
}
}
else if(m_scale.cx == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 128; i++)
{
((WORD*)dst)[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadPage16(int tx, int ty, WORD* dst)
void GPULocalMemory::ReadPage16(int tx, int ty, WORD* RESTRICT dst)
{
WORD* src = GetPixelAddress(tx << (6 + 1), ty << (8 + 1));
WORD* src = GetPixelAddressScaled(tx << 6, ty << 8);
for(int j = 0; j < 256; j++, src += m_width << 1, dst += 256)
int pitch = GetWidth() << m_scale.cy;
if(m_scale.cx == 0)
{
for(int i = 0; i < 256; i++)
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
dst[i] = src[i * 2];
memcpy(dst, src, 512);
}
}
else if(m_scale.cx == 1)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 2];
}
}
}
else if(m_scale.cx == 2)
{
for(int j = 0; j < 256; j++, src += pitch, dst += 256)
{
for(int i = 0; i < 256; i++)
{
dst[i] = src[i * 4];
}
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::ReadFrame32(const CRect& r, DWORD* dst, bool rgb24)
void GPULocalMemory::ReadFrame32(const CRect& r, DWORD* RESTRICT dst, bool rgb24)
{
WORD* src = GetPixelAddress(r.left, r.top);
int pitch = GetWidth();
if(rgb24)
{
for(int i = r.top; i < r.bottom; i++, src += m_width, dst += m_width)
for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch)
{
Expand24(src, dst, r.Width());
}
}
else
{
for(int i = r.top; i < r.bottom; i++, src += m_width, dst += m_width)
for(int i = r.top; i < r.bottom; i++, src += pitch, dst += pitch)
{
Expand16(src, dst, r.Width());
}
@@ -369,23 +540,44 @@ void GPULocalMemory::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int
void GPULocalMemory::Expand24(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels)
{
// TODO: sse
BYTE* s = (BYTE*)src;
for(int i = 0; i < pixels; i += 4, s += 12)
if(m_scale.cx == 0)
{
dst[i + 0] = dst[i + 1] = (s[4] << 16) | (s[1] << 8) | s[0];
dst[i + 2] = dst[i + 3] = (s[9] << 16) | (s[8] << 8) | s[5];
for(int i = 0; i < pixels; i += 2, s += 6)
{
dst[i + 0] = (s[2] << 16) | (s[1] << 8) | s[0];
dst[i + 1] = (s[5] << 16) | (s[4] << 8) | s[3];
}
}
else if(m_scale.cx == 1)
{
for(int i = 0; i < pixels; i += 4, s += 12)
{
dst[i + 0] = dst[i + 1] = (s[4] << 16) | (s[1] << 8) | s[0];
dst[i + 2] = dst[i + 3] = (s[9] << 16) | (s[8] << 8) | s[5];
}
}
else if(m_scale.cx == 2)
{
for(int i = 0; i < pixels; i += 8, s += 24)
{
dst[i + 0] = dst[i + 1] = dst[i + 2] = dst[i + 3] = (s[8] << 16) | (s[1] << 8) | s[0];
dst[i + 4] = dst[i + 5] = dst[i + 6] = dst[i + 7] = (s[17] << 16) | (s[16] << 8) | s[9];
}
}
else
{
ASSERT(0);
}
}
void GPULocalMemory::SaveBMP(LPCTSTR path, CRect r, int tp, int cx, int cy)
{
r.left <<= 1;
r.top <<= 1;
r.right <<= 1;
r.bottom <<= 1;
r.left <<= m_scale.cx;
r.top <<= m_scale.cy;
r.right <<= m_scale.cx;
r.bottom <<= m_scale.cy;
r.left &= ~1;
r.right &= ~1;
@@ -412,12 +604,14 @@ void GPULocalMemory::SaveBMP(LPCTSTR path, CRect r, int tp, int cx, int cy)
fwrite(&bfh, 1, sizeof(bfh), fp);
fwrite(&bih, 1, sizeof(bih), fp);
WORD* buff = (WORD*)_aligned_malloc(sizeof(WORD) * m_width, 16);
DWORD* buff32 = (DWORD*)_aligned_malloc(sizeof(DWORD) * m_width, 16);
int pitch = GetWidth();
WORD* buff = (WORD*)_aligned_malloc(pitch * sizeof(WORD), 16);
DWORD* buff32 = (DWORD*)_aligned_malloc(pitch * sizeof(DWORD), 16);
WORD* src = GetPixelAddress(r.left, r.bottom - 1);
const WORD* clut = GetCLUT(tp, cx, cy);
for(int j = r.bottom - 1; j >= r.top; j--, src -= m_width)
for(int j = r.bottom - 1; j >= r.top; j--, src -= pitch)
{
switch(tp)
{

View File

@@ -47,16 +47,18 @@ class GPULocalMemory
WORD valid[3][2];
} m_texture;
public:
static const int m_width = (1024 << 1);
static const int m_height = (512 << 1);
static const int m_size = m_width * m_height * 2;
CSize m_scale;
public:
GPULocalMemory();
GPULocalMemory(const CSize& scale);
virtual ~GPULocalMemory();
WORD* GetPixelAddress(int x, int y) const {return &m_vm[(y << (10 + 1)) + x];}
CSize GetScale() {return m_scale;}
int GetWidth() {return 1 << (10 + m_scale.cx);}
int GetHeight() {return 1 << (9 + m_scale.cy);}
WORD* GetPixelAddress(int x, int y) const {return &m_vm[(y << (10 + m_scale.cx)) + x];}
WORD* GetPixelAddressScaled(int x, int y) const {return &m_vm[((y << m_scale.cy) << (10 + m_scale.cx)) + (x << m_scale.cx)];}
const WORD* GetCLUT(int tp, int cx, int cy);
const void* GetTexture(int tp, int tx, int ty);
@@ -68,14 +70,14 @@ public:
void ReadRect(const CRect& r, WORD* RESTRICT dst);
void MoveRect(const CPoint& src, const CPoint& dst, int w, int h);
void ReadPage4(int tx, int ty, BYTE* dst);
void ReadPage8(int tx, int ty, BYTE* dst);
void ReadPage16(int tx, int ty, WORD* dst);
void ReadPage4(int tx, int ty, BYTE* RESTRICT dst);
void ReadPage8(int tx, int ty, BYTE* RESTRICT dst);
void ReadPage16(int tx, int ty, WORD* RESTRICT dst);
void ReadFrame32(const CRect& r, DWORD* dst, bool rgb24);
void ReadFrame32(const CRect& r, DWORD* RESTRICT dst, bool rgb24);
static void Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels);
static void Expand24(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels);
void Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels);
void Expand24(const WORD* RESTRICT src, DWORD* RESTRICT dst, int pixels);
void SaveBMP(LPCTSTR path, CRect r, int tp, int cx, int cy);
};

View File

@@ -30,6 +30,7 @@ struct GPURendererSettings
int m_dither;
int m_aspectratio;
bool m_vsync;
CSize m_scale;
};
class GPURendererBase : public GPUState, protected GPURendererSettings
@@ -79,13 +80,15 @@ protected:
public:
GPURendererBase(const GPURendererSettings& rs)
: m_hWnd(NULL)
: GPUState(rs.m_scale)
, m_hWnd(NULL)
, m_wndproc(NULL)
{
m_filter = rs.m_filter;
m_dither = rs.m_dither;
m_aspectratio = rs.m_aspectratio;
m_vsync = rs.m_vsync;
m_scale = m_mem.GetScale();
}
virtual ~GPURendererBase()
@@ -208,13 +211,8 @@ protected:
{
if(m_count > 0)
{
// Dump(_T("db"));
Draw();
m_count = 0;
/*
Dump(_T("dc"), false);
/*
Dump(_T("db"));
if(m_env.PRIM.TME)
{
@@ -229,7 +227,13 @@ protected:
str.Format(_T("da_%d_%d_%d_%d_%d"), m_env.STATUS.TP, r);
Dump(str, m_env.STATUS.TP, r, false);
}
*/
*/
Draw();
m_count = 0;
//Dump(_T("dc"), false);
}
}
@@ -346,10 +350,13 @@ public:
double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame);
CRect r = m_env.GetDisplayRect();
int w = r.Width() << m_scale.cx;
int h = r.Height() << m_scale.cy;
s_stats.Format(
_T("%I64d | %d x %d | %.2f fps (%d%%) | %d/%d | %d%% CPU | %.2f | %.2f"),
m_perfmon.GetFrame(), r.Width(), r.Height(), fps, (int)(100.0 * fps / m_env.GetFPS()),
m_perfmon.GetFrame(), w, h, fps, (int)(100.0 * fps / m_env.GetFPS()),
(int)m_perfmon.Get(GSPerfMon::Prim),
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),

View File

@@ -25,13 +25,15 @@
#include "GPUDrawScanline.h"
template <class Device>
class GPURendererSW : public GPURenderer<Device, GSVertexSW>
class GPURendererSW : public GPURenderer<Device, GSVertexSW>, public IDrawAsync
{
typedef GSVertexSW Vertex;
protected:
long* m_sync;
long m_threads;
GSRasterizer* m_rst;
CAtlList<GSRasterizerMT*> m_rmt;
Texture m_texture;
void ResetDevice()
@@ -43,10 +45,17 @@ protected:
{
CRect r = m_env.GetDisplayRect();
r.left <<= 1;
r.top <<= 1;
r.right <<= 1;
r.bottom <<= 1;
r.left <<= m_scale.cx;
r.top <<= m_scale.cy;
r.right <<= m_scale.cx;
r.bottom <<= m_scale.cy;
// TODO
static DWORD* buff = (DWORD*)_aligned_malloc(m_mem.GetWidth() * m_mem.GetHeight() * sizeof(DWORD), 16);
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
r.OffsetRect(-r.TopLeft());
if(m_texture.GetWidth() != r.Width() || m_texture.GetHeight() != r.Height())
{
@@ -58,14 +67,7 @@ protected:
return false;
}
// TODO
static DWORD* buff = (DWORD*)_aligned_malloc(GPULocalMemory::m_size * 2, 16);
m_mem.ReadFrame32(r, buff, !!m_env.STATUS.ISRGB24);
r.OffsetRect(-r.TopLeft());
m_texture.Update(r, buff, GPULocalMemory::m_width * 4);
m_texture.Update(r, buff, m_mem.GetWidth() * sizeof(DWORD));
t = m_texture;
@@ -78,16 +80,16 @@ protected:
// TODO: x/y + off.x/y should wrap around at +/-1024
int x = m_v.XY.X + m_env.DROFF.X;
int y = m_v.XY.Y + m_env.DROFF.Y;
int x = (int)(m_v.XY.X + m_env.DROFF.X) << m_scale.cx;
int y = (int)(m_v.XY.Y + m_env.DROFF.Y) << m_scale.cy;
int s = m_v.UV.X;
int t = m_v.UV.Y;
GSVector4 pt(x << 1, y << 1, s, t);
GSVector4 pt(x, y, s, t);
v.p = pt.xyxy(GSVector4::zero());
v.t = pt.zwzw(GSVector4::zero()) + GSVector4(0.25f);
v.t = (pt.zwzw(GSVector4::zero()) + GSVector4(0.125f)) * 256.0f;
v.c = GSVector4((DWORD)m_v.RGB.ai32) * 128.0f;
__super::VertexKick();
@@ -108,6 +110,18 @@ protected:
// TODO
}
GSVector4i GetScissor()
{
GSVector4i v;
v.x = (int)m_env.DRAREATL.X << m_scale.cx;
v.y = (int)m_env.DRAREATL.Y << m_scale.cy;
v.z = min((int)(m_env.DRAREABR.X + 1) << m_scale.cx, m_mem.GetWidth());
v.w = min((int)(m_env.DRAREABR.Y + 1) << m_scale.cy, m_mem.GetHeight());
return v;
}
void Draw()
{
const void* texture = NULL;
@@ -122,51 +136,52 @@ protected:
//
GPUDrawScanline* ds = (GPUDrawScanline*)m_rst->GetDrawScanline();
ds->SetOptions(m_filter, m_dither);
ds->SetOptions(m_filter, m_dither);
ds->SetupDraw(m_vertices, m_count, texture);
//
GSVector4i scissor;
*m_sync = 0;
scissor.x = m_env.DRAREATL.X << 1;
scissor.y = m_env.DRAREATL.Y << 1;
scissor.z = min((m_env.DRAREABR.X + 1) << 1, 1024 << 1);
scissor.w = min((m_env.DRAREABR.Y + 1) << 1, 512 << 1);
POSITION pos = m_rmt.GetHeadPosition();
//
int prims = 0;
switch(m_env.PRIM.TYPE)
while(pos)
{
case GPU_POLYGON:
ASSERT(!(m_count % 3));
prims = m_count / 3;
for(int i = 0, j = m_count; i < j; i += 3) m_rst->DrawTriangle(&m_vertices[i], scissor);
break;
case GPU_LINE:
ASSERT(!(m_count & 1));
prims = m_count / 2;
for(int i = 0, j = m_count; i < j; i += 2) m_rst->DrawLine(&m_vertices[i], scissor);
break;
case GPU_SPRITE:
ASSERT(!(m_count & 1));
prims = m_count / 2;
for(int i = 0, j = m_count; i < j; i += 2) m_rst->DrawSprite(&m_vertices[i], scissor, false);
break;
default:
__assume(0);
GSRasterizerMT* r = m_rmt.GetNext(pos);
GPUDrawScanline* ds = (GPUDrawScanline*)r->GetDrawScanline();
ds->SetOptions(m_filter, m_dither);
ds->SetupDraw(m_vertices, m_count, texture);
r->Draw();
}
// 1st thread is this thread
int prims = DrawAsync(m_rst);
// wait for the other threads to finish
while(*m_sync)
{
_mm_pause();
}
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Draw, 1);
{
int pixels = m_rst->GetPixels();
POSITION pos = m_rmt.GetHeadPosition();
while(pos)
{
pixels += m_rmt.GetNext(pos)->GetPixels();
}
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
}
@@ -184,25 +199,63 @@ protected:
br = br.maxv(p);
}
GSVector4i scissor = GetScissor();
CRect r;
r.left = max(scissor.x, min(scissor.z, (int)tl.x)) >> 1;
r.top = max(scissor.y, min(scissor.w, (int)tl.y)) >> 1;
r.right = max(scissor.x, min(scissor.z, (int)br.x)) >> 1;
r.bottom = max(scissor.y, min(scissor.w, (int)br.y)) >> 1;
r.left = max(scissor.x, min(scissor.z, (int)tl.x)) >> m_scale.cx;
r.top = max(scissor.y, min(scissor.w, (int)tl.y)) >> m_scale.cy;
r.right = max(scissor.x, min(scissor.z, (int)br.x)) >> m_scale.cx;
r.bottom = max(scissor.y, min(scissor.w, (int)br.y)) >> m_scale.cy;
Invalidate(r);
}
}
int DrawAsync(GSRasterizer* r)
{
GSVector4i scissor = GetScissor();
int prims = 0;
switch(m_env.PRIM.TYPE)
{
case GPU_POLYGON:
ASSERT(!(m_count % 3));
prims = m_count / 3;
for(int i = 0, j = m_count; i < j; i += 3) r->DrawTriangle(&m_vertices[i], scissor);
break;
case GPU_LINE:
ASSERT(!(m_count & 1));
prims = m_count / 2;
for(int i = 0, j = m_count; i < j; i += 2) r->DrawLine(&m_vertices[i], scissor);
break;
case GPU_SPRITE:
ASSERT(!(m_count & 1));
prims = m_count / 2;
for(int i = 0, j = m_count; i < j; i += 2) r->DrawSprite(&m_vertices[i], scissor, false);
break;
default:
__assume(0);
}
return prims;
}
public:
GPURendererSW(const GPURendererSettings& rs)
: GPURenderer(rs)
{
m_threads = 1;
m_sync = (long*)_aligned_malloc(sizeof(*m_sync), 128); // get a whole cache line
m_threads = AfxGetApp()->GetProfileInt(_T("GPUSettings"), _T("swthreads"), 1);
m_rst = new GSRasterizer(new GPUDrawScanline(this, m_filter, m_dither), 0, m_threads);
for(int i = 1; i < m_threads; i++)
{
m_rmt.AddTail(new GSRasterizerMT(new GPUDrawScanline(this, m_filter, m_dither), i, m_threads, this, m_sync));
}
m_fpDrawingKickHandlers[GPU_POLYGON] = (DrawingKickHandler)&GPURendererSW::DrawingKickTriangle;
m_fpDrawingKickHandlers[GPU_LINE] = (DrawingKickHandler)&GPURendererSW::DrawingKickLine;
m_fpDrawingKickHandlers[GPU_SPRITE] = (DrawingKickHandler)&GPURendererSW::DrawingKickSprite;
@@ -211,5 +264,10 @@ public:
virtual ~GPURendererSW()
{
delete m_rst;
while(!m_rmt.IsEmpty())
{
delete m_rmt.RemoveHead();
}
}
};

301
gsdx/GPUSettingsDlg.cpp Normal file
View File

@@ -0,0 +1,301 @@
/*
* Copyright (C) 2007 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSdx.h"
#include "GSUtil.h"
#include "GPUSettingsDlg.h"
#include <shlobj.h>
#include <afxpriv.h>
GSSetting GPUSettingsDlg::g_renderers[] =
{
// {0, _T("Direct3D7 (Software)"), NULL},
{1, _T("Direct3D9 (Software)"), NULL},
{2, _T("Direct3D10 (Software)"), NULL},
// {3, _T("Null (Null)"), NULL},
};
GSSetting GPUSettingsDlg::g_psversion[] =
{
{D3DPS_VERSION(3, 0), _T("Pixel Shader 3.0"), NULL},
{D3DPS_VERSION(2, 0), _T("Pixel Shader 2.0"), NULL},
//{D3DPS_VERSION(1, 4), _T("Pixel Shader 1.4"), NULL},
//{D3DPS_VERSION(1, 1), _T("Pixel Shader 1.1"), NULL},
//{D3DPS_VERSION(0, 0), _T("Fixed Pipeline (bogus)"), NULL},
};
GSSetting GPUSettingsDlg::g_filter[] =
{
{0, _T("Nearest"), NULL},
{1, _T("Bilinear (polygons only)"), NULL},
{2, _T("Bilinear"), NULL},
};
GSSetting GPUSettingsDlg::g_dithering[] =
{
{0, _T("Disabled"), NULL},
{1, _T("Auto"), NULL},
};
GSSetting GPUSettingsDlg::g_aspectratio[] =
{
{0, _T("Stretch"), NULL},
{1, _T("4:3"), NULL},
{2, _T("16:9"), NULL},
};
GSSetting GPUSettingsDlg::g_internalresolution[] =
{
{0 | (0 << 2), _T("H x 1 - V x 1"), NULL},
{1 | (0 << 2), _T("H x 2 - V x 1"), NULL},
{0 | (1 << 2), _T("H x 1 - V x 2"), NULL},
{1 | (1 << 2), _T("H x 2 - V x 2"), NULL},
{2 | (1 << 2), _T("H x 4 - V x 2"), NULL},
{1 | (2 << 2), _T("H x 2 - V x 4"), NULL},
{2 | (2 << 2), _T("H x 4 - V x 4"), NULL},
};
IMPLEMENT_DYNAMIC(GPUSettingsDlg, CDialog)
GPUSettingsDlg::GPUSettingsDlg(CWnd* pParent /*=NULL*/)
: CDialog(GPUSettingsDlg::IDD, pParent)
{
}
GPUSettingsDlg::~GPUSettingsDlg()
{
}
LRESULT GPUSettingsDlg::DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam)
{
LRESULT ret = __super::DefWindowProc(message, wParam, lParam);
if(message == WM_INITDIALOG)
{
SendMessage(WM_KICKIDLE);
}
return ret;
}
void GPUSettingsDlg::DoDataExchange(CDataExchange* pDX)
{
__super::DoDataExchange(pDX);
DDX_Control(pDX, IDC_COMBO3, m_resolution);
DDX_Control(pDX, IDC_COMBO1, m_renderer);
DDX_Control(pDX, IDC_COMBO4, m_psversion);
DDX_Control(pDX, IDC_COMBO2, m_filter);
DDX_Control(pDX, IDC_COMBO5, m_dithering);
DDX_Control(pDX, IDC_COMBO6, m_aspectratio);
DDX_Control(pDX, IDC_COMBO7, m_internalresolution);
DDX_Control(pDX, IDC_SPIN3, m_swthreads);
DDX_Control(pDX, IDC_EDIT3, m_swthreadsedit);
}
BEGIN_MESSAGE_MAP(GPUSettingsDlg, CDialog)
ON_MESSAGE_VOID(WM_KICKIDLE, OnKickIdle)
ON_UPDATE_COMMAND_UI(IDC_COMBO4, OnUpdateD3D9Options)
ON_UPDATE_COMMAND_UI(IDC_COMBO7, OnUpdateSWOptions)
ON_UPDATE_COMMAND_UI(IDC_SPIN3, OnUpdateSWOptions)
ON_UPDATE_COMMAND_UI(IDC_EDIT3, OnUpdateSWOptions)
ON_CBN_SELCHANGE(IDC_COMBO1, &GPUSettingsDlg::OnCbnSelchangeCombo1)
END_MESSAGE_MAP()
void GPUSettingsDlg::OnKickIdle()
{
UpdateDialogControls(this, false);
}
BOOL GPUSettingsDlg::OnInitDialog()
{
__super::OnInitDialog();
CWinApp* pApp = AfxGetApp();
D3DCAPS9 caps;
memset(&caps, 0, sizeof(caps));
caps.PixelShaderVersion = D3DPS_VERSION(0, 0);
m_modes.RemoveAll();
// windowed
{
D3DDISPLAYMODE mode;
memset(&mode, 0, sizeof(mode));
m_modes.AddTail(mode);
int iItem = m_resolution.AddString(_T("Windowed"));
m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition());
m_resolution.SetCurSel(iItem);
}
// fullscreen
if(CComPtr<IDirect3D9> d3d = Direct3DCreate9(D3D_SDK_VERSION))
{
UINT ModeWidth = pApp->GetProfileInt(_T("Settings"), _T("ModeWidth"), 0);
UINT ModeHeight = pApp->GetProfileInt(_T("Settings"), _T("ModeHeight"), 0);
UINT ModeRefreshRate = pApp->GetProfileInt(_T("Settings"), _T("ModeRefreshRate"), 0);
UINT nModes = d3d->GetAdapterModeCount(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8);
for(UINT i = 0; i < nModes; i++)
{
D3DDISPLAYMODE mode;
if(S_OK == d3d->EnumAdapterModes(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8, i, &mode))
{
CString str;
str.Format(_T("%dx%d %dHz"), mode.Width, mode.Height, mode.RefreshRate);
int iItem = m_resolution.AddString(str);
m_modes.AddTail(mode);
m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition());
if(ModeWidth == mode.Width && ModeHeight == mode.Height && ModeRefreshRate == mode.RefreshRate)
{
m_resolution.SetCurSel(iItem);
}
}
}
d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &caps);
}
bool isdx10avail = GSUtil::IsDirect3D10Available();
CAtlArray<GSSetting> renderers;
for(size_t i = 0; i < countof(g_renderers); i++)
{
if(i == 2 && !isdx10avail) continue;
renderers.Add(g_renderers[i]);
}
GSSetting::InitComboBox(renderers.GetData(), renderers.GetCount(), m_renderer, pApp->GetProfileInt(_T("GPUSettings"), _T("Renderer"), 1));
GSSetting::InitComboBox(g_psversion, countof(g_psversion), m_psversion, pApp->GetProfileInt(_T("Settings"), _T("PixelShaderVersion2"), D3DPS_VERSION(2, 0)), caps.PixelShaderVersion);
GSSetting::InitComboBox(g_filter, countof(g_filter), m_filter, pApp->GetProfileInt(_T("GPUSettings"), _T("filter"), 0));
GSSetting::InitComboBox(g_dithering, countof(g_dithering), m_dithering, pApp->GetProfileInt(_T("GPUSettings"), _T("dithering"), 1));
GSSetting::InitComboBox(g_aspectratio, countof(g_aspectratio), m_aspectratio, pApp->GetProfileInt(_T("GPUSettings"), _T("AspectRatio"), 1));
GSSetting::InitComboBox(g_internalresolution, countof(g_internalresolution), m_internalresolution, pApp->GetProfileInt(_T("GPUSettings"), _T("scale_x"), 0) | (pApp->GetProfileInt(_T("GPUSettings"), _T("scale_y"), 0) << 2));
OnCbnSelchangeCombo1();
//
m_swthreads.SetRange(1, 16);
m_swthreads.SetPos(pApp->GetProfileInt(_T("GPUSettings"), _T("swthreads"), 1));
//
UpdateData(FALSE);
return TRUE; // return TRUE unless you set the focus to a control
// EXCEPTION: OCX Property Pages should return FALSE
}
void GPUSettingsDlg::OnOK()
{
CWinApp* pApp = AfxGetApp();
UpdateData();
if(m_resolution.GetCurSel() >= 0)
{
D3DDISPLAYMODE& mode = m_modes.GetAt((POSITION)m_resolution.GetItemData(m_resolution.GetCurSel()));
pApp->WriteProfileInt(_T("Settings"), _T("ModeWidth"), mode.Width);
pApp->WriteProfileInt(_T("Settings"), _T("ModeHeight"), mode.Height);
pApp->WriteProfileInt(_T("Settings"), _T("ModeRefreshRate"), mode.RefreshRate);
}
if(m_renderer.GetCurSel() >= 0)
{
pApp->WriteProfileInt(_T("GPUSettings"), _T("Renderer"), (DWORD)m_renderer.GetItemData(m_renderer.GetCurSel()));
}
if(m_psversion.GetCurSel() >= 0)
{
pApp->WriteProfileInt(_T("Settings"), _T("PixelShaderVersion2"), (DWORD)m_psversion.GetItemData(m_psversion.GetCurSel()));
}
if(m_filter.GetCurSel() >= 0)
{
pApp->WriteProfileInt(_T("GPUSettings"), _T("filter"), (DWORD)m_filter.GetItemData(m_filter.GetCurSel()));
}
if(m_dithering.GetCurSel() >= 0)
{
pApp->WriteProfileInt(_T("GPUSettings"), _T("dithering"), (DWORD)m_dithering.GetItemData(m_dithering.GetCurSel()));
}
if(m_aspectratio.GetCurSel() >= 0)
{
pApp->WriteProfileInt(_T("GPUSettings"), _T("AspectRatio"), (DWORD)m_aspectratio.GetItemData(m_aspectratio.GetCurSel()));
}
if(m_internalresolution.GetCurSel() >= 0)
{
DWORD value = (DWORD)m_internalresolution.GetItemData(m_internalresolution.GetCurSel());
pApp->WriteProfileInt(_T("GPUSettings"), _T("scale_x"), value & 3);
pApp->WriteProfileInt(_T("GPUSettings"), _T("scale_y"), (value >> 2) & 3);
}
pApp->WriteProfileInt(_T("GPUSettings"), _T("swthreads"), m_swthreads.GetPos());
__super::OnOK();
}
void GPUSettingsDlg::OnUpdateResolution(CCmdUI* pCmdUI)
{
UpdateData();
int i = (int)m_renderer.GetItemData(m_renderer.GetCurSel());
pCmdUI->Enable(i == 1);
}
void GPUSettingsDlg::OnUpdateD3D9Options(CCmdUI* pCmdUI)
{
int i = (int)m_renderer.GetItemData(m_renderer.GetCurSel());
pCmdUI->Enable(i == 1);
}
void GPUSettingsDlg::OnUpdateSWOptions(CCmdUI* pCmdUI)
{
int i = (int)m_renderer.GetItemData(m_renderer.GetCurSel());
pCmdUI->Enable(i >= 0 && i <= 2);
}
void GPUSettingsDlg::OnCbnSelchangeCombo1()
{
int i = (int)m_renderer.GetItemData(m_renderer.GetCurSel());
GetDlgItem(IDC_LOGO9)->ShowWindow(i == 1 ? SW_SHOW : SW_HIDE);
GetDlgItem(IDC_LOGO10)->ShowWindow(i == 2 ? SW_SHOW : SW_HIDE);
}

71
gsdx/GPUSettingsDlg.h Normal file
View File

@@ -0,0 +1,71 @@
/*
* Copyright (C) 2007 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSSetting.h"
#include "resource.h"
class GPUSettingsDlg : public CDialog
{
DECLARE_DYNAMIC(GPUSettingsDlg)
private:
CAtlList<D3DDISPLAYMODE> m_modes;
public:
GPUSettingsDlg(CWnd* pParent = NULL); // standard constructor
virtual ~GPUSettingsDlg();
static GSSetting g_renderers[];
static GSSetting g_psversion[];
static GSSetting g_filter[];
static GSSetting g_dithering[];
static GSSetting g_aspectratio[];
static GSSetting g_internalresolution[];
// Dialog Data
enum { IDD = IDD_GPUCONFIG };
CComboBox m_resolution;
CComboBox m_renderer;
CComboBox m_psversion;
CComboBox m_filter;
CComboBox m_dithering;
CComboBox m_aspectratio;
CComboBox m_internalresolution;
CSpinButtonCtrl m_swthreads;
CEdit m_swthreadsedit;
protected:
virtual LRESULT DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam);
virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
virtual BOOL OnInitDialog();
virtual void OnOK();
DECLARE_MESSAGE_MAP()
public:
afx_msg void OnKickIdle();
afx_msg void OnUpdateResolution(CCmdUI* pCmdUI);
afx_msg void OnUpdateD3D9Options(CCmdUI* pCmdUI);
afx_msg void OnUpdateSWOptions(CCmdUI* pCmdUI);
afx_msg void OnCbnSelchangeCombo1();
};

View File

@@ -22,8 +22,9 @@
#include "stdafx.h"
#include "GPUState.h"
GPUState::GPUState()
: s_n(0)
GPUState::GPUState(const CSize& scale)
: m_mem(scale)
, s_n(0)
{
memset(m_status, 0, sizeof(m_status));

View File

@@ -97,7 +97,7 @@ protected:
if(inc) s_n++;
//if(s_n < 133) return;
//if(s_n < 86) return;
int dir = 1;
#ifdef DEBUG
@@ -121,7 +121,7 @@ public:
UINT32 m_status[256];
public:
GPUState();
GPUState(const CSize& scale);
virtual ~GPUState();
virtual void Reset();

View File

@@ -932,6 +932,8 @@ void GSDrawScanline::Init()
InitDS();
#ifdef FAST_DRAWSCANLINE
// ffx
m_dsmap.SetAt(0x2420c265, (DrawScanlinePtr)&GSDrawScanline::DrawScanlineExT<0x2420c265>);
@@ -2153,6 +2155,8 @@ void GSDrawScanline::Init()
m_dsmap.SetAt(0xa4802c09, (DrawScanlinePtr)&GSDrawScanline::DrawScanlineExT<0xa4802c09>);
m_dsmap.SetAt(0xa485bc29, (DrawScanlinePtr)&GSDrawScanline::DrawScanlineExT<0xa485bc29>);
m_dsmap.SetAt(0xe441bc29, (DrawScanlinePtr)&GSDrawScanline::DrawScanlineExT<0xe441bc29>);
#endif
/*
// dmc (fixme)
@@ -2216,7 +2220,7 @@ void GSDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex& v
continue;
}
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4)));
int pixels = GSVector4i::min_i16(steps, 4);
GSVector4 c[12];
@@ -2442,7 +2446,7 @@ void GSDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex&
continue;
}
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4)));
int pixels = GSVector4i::min_i16(steps, 4);
GSVector4 c[12];

View File

@@ -489,10 +489,8 @@ GSRasterizerMT::~GSRasterizerMT()
}
}
void GSRasterizerMT::Run(Vertex* vertices, int count, const void* texture)
void GSRasterizerMT::Draw()
{
m_ds->SetupDraw(vertices, count, texture);
InterlockedBitTestAndSet(m_sync, m_id);
}

View File

@@ -23,6 +23,8 @@
#include "GSVertexSW.h"
#define FAST_DRAWSCANLINE
class IDrawScanline
{
public:
@@ -89,5 +91,5 @@ public:
GSRasterizerMT(IDrawScanline* ds, int id, int threads, IDrawAsync* da, long* sync);
virtual ~GSRasterizerMT();
void Run(Vertex* vertices, int count, const void* texture);
void Draw();
};

View File

@@ -353,8 +353,8 @@ public:
_T("%I64d | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d/%d | %d%% CPU | %.2f | %.2f"),
m_perfmon.GetFrame(), GetDisplaySize().cx, GetDisplaySize().cy, fps, (int)(100.0 * fps / GetFPS()),
SMODE2->INT ? (CString(_T("Interlaced ")) + (SMODE2->FFMD ? _T("(frame)") : _T("(field)"))) : _T("Progressive"),
g_interlace[m_interlace].name,
g_aspectratio[m_aspectratio].name,
GSSettingsDlg::g_interlace[m_interlace].name,
GSSettingsDlg::g_aspectratio[m_aspectratio].name,
(int)m_perfmon.Get(GSPerfMon::Prim),
(int)m_perfmon.Get(GSPerfMon::Draw),
m_perfmon.CPU(),

View File

@@ -221,6 +221,17 @@ protected:
}
}
GSVector4i GetScissor()
{
GSVector4i v = GSVector4i(m_context->scissor.in);
// TODO: find a game that overflows and check which one is the right behaviour
v.z = min(v.z, (int)m_context->FRAME.FBW * 64);
return v;
}
void Draw()
{
// TODO: lot to optimize here
@@ -311,7 +322,9 @@ protected:
//
m_rst->GetDrawScanline()->SetupDraw(m_vertices, m_count, texture);
GSDrawScanline* ds = (GSDrawScanline*)m_rst->GetDrawScanline();
ds->SetupDraw(m_vertices, m_count, texture);
//
@@ -321,7 +334,13 @@ protected:
while(pos)
{
m_rmt.GetNext(pos)->Run(m_vertices, m_count, texture);
GSRasterizerMT* r = m_rmt.GetNext(pos);
GSDrawScanline* ds = (GSDrawScanline*)r->GetDrawScanline();
ds->SetupDraw(m_vertices, m_count, texture);
r->Draw();
}
// 1st thread is this thread
@@ -365,11 +384,7 @@ protected:
br = br.maxv(p);
}
GSVector4i scissor(context->scissor.in);
// TODO: find a game that overflows and check which one is the right behaviour
scissor.z = min(scissor.z, (int)context->FRAME.FBW * 64);
GSVector4i scissor = GetScissor();
CRect r;
@@ -409,12 +424,6 @@ protected:
{
GSDrawingContext* context = m_context;
GSVector4i scissor(context->scissor.in);
// TODO: find a game that overflows and check which one is the right behaviour
scissor.z = min(scissor.z, (int)context->FRAME.FBW * 64);
//
bool solid = true;
@@ -431,6 +440,8 @@ protected:
//
GSVector4i scissor = GetScissor();
int prims = 0;
switch(PRIM->PRIM)

View File

@@ -26,4 +26,20 @@ struct GSSetting
DWORD id;
const TCHAR* name;
const TCHAR* note;
static void InitComboBox(const GSSetting* settings, int count, CComboBox& combobox, DWORD sel, DWORD maxid = ~0)
{
for(int i = 0; i < count; i++)
{
if(settings[i].id <= maxid)
{
CString str = settings[i].name;
if(settings[i].note != NULL) str = str + _T(" (") + settings[i].note + _T(")");
int item = combobox.AddString(str);
combobox.SetItemData(item, settings[i].id);
if(settings[i].id == sel) combobox.SetCurSel(item);
}
}
}
};

View File

@@ -22,10 +22,11 @@
#include "stdafx.h"
#include "GSdx.h"
#include "GSSettingsDlg.h"
#include "GSUtil.h"
#include <shlobj.h>
#include <afxpriv.h>
GSSetting g_renderers[] =
GSSetting GSSettingsDlg::g_renderers[] =
{
{0, _T("Direct3D9 (Hardware)"), NULL},
{1, _T("Direct3D9 (Software)"), NULL},
@@ -37,7 +38,7 @@ GSSetting g_renderers[] =
{7, _T("Null (Null)"), NULL},
};
GSSetting g_psversion[] =
GSSetting GSSettingsDlg::g_psversion[] =
{
{D3DPS_VERSION(3, 0), _T("Pixel Shader 3.0"), NULL},
{D3DPS_VERSION(2, 0), _T("Pixel Shader 2.0"), NULL},
@@ -46,7 +47,7 @@ GSSetting g_psversion[] =
//{D3DPS_VERSION(0, 0), _T("Fixed Pipeline (bogus)"), NULL},
};
GSSetting g_interlace[] =
GSSetting GSSettingsDlg::g_interlace[] =
{
{0, _T("None"), NULL},
{1, _T("Weave tff"), _T("saw-tooth")},
@@ -57,7 +58,7 @@ GSSetting g_interlace[] =
{6, _T("Blend bff"), _T("slight blur, 1/2 fps")},
};
GSSetting g_aspectratio[] =
GSSetting GSSettingsDlg::g_aspectratio[] =
{
{0, _T("Stretch"), NULL},
{1, _T("4:3"), NULL},
@@ -81,21 +82,6 @@ GSSettingsDlg::~GSSettingsDlg()
{
}
void GSSettingsDlg::InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid)
{
for(int i = 0; i < count; i++)
{
if(settings[i].id <= maxid)
{
CString str = settings[i].name;
if(settings[i].note != NULL) str = str + _T(" (") + settings[i].note + _T(")");
int item = combobox.AddString(str);
combobox.SetItemData(item, settings[i].id);
if(settings[i].id == sel) combobox.SetCurSel(item);
}
}
}
LRESULT GSSettingsDlg::DefWindowProc(UINT message, WPARAM wParam, LPARAM lParam)
{
LRESULT ret = __super::DefWindowProc(message, wParam, lParam);
@@ -151,18 +137,6 @@ void GSSettingsDlg::OnKickIdle()
UpdateDialogControls(this, false);
}
static bool IsDirect3D10Available()
{
if(HMODULE hModule = LoadLibrary(_T("d3d10.dll")))
{
FreeLibrary(hModule);
return true;
}
return false;
}
BOOL GSSettingsDlg::OnInitDialog()
{
__super::OnInitDialog();
@@ -220,7 +194,7 @@ BOOL GSSettingsDlg::OnInitDialog()
d3d->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &caps);
}
bool isdx10avail = IsDirect3D10Available();
bool isdx10avail = GSUtil::IsDirect3D10Available();
CAtlArray<GSSetting> renderers;
@@ -231,10 +205,10 @@ BOOL GSSettingsDlg::OnInitDialog()
renderers.Add(g_renderers[i]);
}
InitComboBox(m_renderer, renderers.GetData(), renderers.GetCount(), pApp->GetProfileInt(_T("Settings"), _T("Renderer"), 0));
InitComboBox(m_psversion, g_psversion, countof(g_psversion), pApp->GetProfileInt(_T("Settings"), _T("PixelShaderVersion2"), D3DPS_VERSION(2, 0)), caps.PixelShaderVersion);
InitComboBox(m_interlace, g_interlace, countof(g_interlace), pApp->GetProfileInt(_T("Settings"), _T("Interlace"), 0));
InitComboBox(m_aspectratio, g_aspectratio, countof(g_aspectratio), pApp->GetProfileInt(_T("Settings"), _T("AspectRatio"), 1));
GSSetting::InitComboBox(renderers.GetData(), renderers.GetCount(), m_renderer, pApp->GetProfileInt(_T("Settings"), _T("Renderer"), 0));
GSSetting::InitComboBox(g_psversion, countof(g_psversion), m_psversion, pApp->GetProfileInt(_T("Settings"), _T("PixelShaderVersion2"), D3DPS_VERSION(2, 0)), caps.PixelShaderVersion);
GSSetting::InitComboBox(g_interlace, countof(g_interlace), m_interlace, pApp->GetProfileInt(_T("Settings"), _T("Interlace"), 0));
GSSetting::InitComboBox(g_aspectratio, countof(g_aspectratio), m_aspectratio, pApp->GetProfileInt(_T("Settings"), _T("AspectRatio"), 1));
OnCbnSelchangeCombo1();

View File

@@ -24,11 +24,6 @@
#include "GSSetting.h"
#include "resource.h"
extern GSSetting g_renderers[];
extern GSSetting g_psversion[];
extern GSSetting g_interlace[];
extern GSSetting g_aspectratio[];
class GSSettingsDlg : public CDialog
{
DECLARE_DYNAMIC(GSSettingsDlg)
@@ -36,12 +31,15 @@ class GSSettingsDlg : public CDialog
private:
CAtlList<D3DDISPLAYMODE> m_modes;
void InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid = ~0);
public:
GSSettingsDlg(CWnd* pParent = NULL); // standard constructor
virtual ~GSSettingsDlg();
static GSSetting g_renderers[];
static GSSetting g_psversion[];
static GSSetting g_interlace[];
static GSSetting g_aspectratio[];
// Dialog Data
enum { IDD = IDD_CONFIG };
CComboBox m_resolution;

View File

@@ -224,6 +224,18 @@ bool GSUtil::CheckSSE()
return true;
}
bool GSUtil::IsDirect3D10Available()
{
if(HMODULE hModule = LoadLibrary(_T("d3d10.dll")))
{
FreeLibrary(hModule);
return true;
}
return false;
}
char* GSUtil::GetLibName()
{
CString str;

View File

@@ -62,6 +62,8 @@ public:
static bool CheckDirectX();
static bool CheckSSE();
static bool IsDirect3D10Available();
static char* GetLibName();
};

View File

@@ -324,6 +324,11 @@ public:
#endif
static int min_i16(int a, int b)
{
return store(load(a).min_i16(load(b)));
}
GSVector4i blend8(const GSVector4i& a, const GSVector4i& mask) const
{
return GSVector4i(_mm_blendv_epi8(m, a, mask));

View File

@@ -134,6 +134,34 @@ BEGIN
DEFPUSHBUTTON "OK",IDOK,221,47,50,14
END
IDD_GPUCONFIG DIALOGEX 0, 0, 189, 235
STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
CAPTION "Settings..."
FONT 8, "MS Shell Dlg", 400, 0, 0x1
BEGIN
CONTROL 2021,IDC_LOGO9,"Static",SS_BITMAP,7,7,175,44
LTEXT "Resolution:",IDC_STATIC,7,59,37,8
COMBOBOX IDC_COMBO3,78,57,104,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Renderer:",IDC_STATIC,7,74,34,8
COMBOBOX IDC_COMBO1,78,72,104,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Shader:",IDC_STATIC,7,89,26,8
COMBOBOX IDC_COMBO4,78,87,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Texture Filter (Del):",IDC_STATIC,7,105,64,8
COMBOBOX IDC_COMBO2,78,102,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Dithering (End):",IDC_STATIC,7,120,52,8
COMBOBOX IDC_COMBO5,78,117,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Aspect Ratio (PgDn):",IDC_STATIC,7,135,68,8
COMBOBOX IDC_COMBO6,78,132,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
LTEXT "Rendering Threads:",IDC_STATIC,7,165,64,8
EDITTEXT IDC_EDIT3,78,163,35,13,ES_AUTOHSCROLL | ES_NUMBER
CONTROL "",IDC_SPIN3,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,169,11,14
DEFPUSHBUTTON "OK",IDOK,43,214,50,14
PUSHBUTTON "Cancel",IDCANCEL,96,214,50,14
CONTROL 2022,IDC_LOGO10,"Static",SS_BITMAP,7,7,175,44
LTEXT "Internal Resolution:",IDC_STATIC,7,150,64,8
COMBOBOX IDC_COMBO7,78,147,104,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
END
/////////////////////////////////////////////////////////////////////////////
//
@@ -161,6 +189,16 @@ BEGIN
VERTGUIDE, 271
HORZGUIDE, 54
END
IDD_GPUCONFIG, DIALOG
BEGIN
LEFTMARGIN, 7
RIGHTMARGIN, 182
VERTGUIDE, 78
VERTGUIDE, 182
TOPMARGIN, 7
BOTTOMMARGIN, 228
END
END
#endif // APSTUDIO_INVOKED

View File

@@ -1071,6 +1071,10 @@
RelativePath=".\GPURendererSW.cpp"
>
</File>
<File
RelativePath=".\GPUSettingsDlg.cpp"
>
</File>
<File
RelativePath=".\GPUState.cpp"
>
@@ -1617,6 +1621,10 @@
RelativePath=".\GPURendererSW.h"
>
</File>
<File
RelativePath=".\GPUSettingsDlg.h"
>
</File>
<File
RelativePath=".\GPUState.h"
>

View File

@@ -20,7 +20,9 @@
#define IDC_COMBO2 2015
#define IDC_COMBO5 2016
#define IDC_RADIO1 2017
#define IDC_COMBO6 2017
#define IDC_SPIN1 2018
#define IDC_COMBO7 2018
#define IDC_SPIN2 2019
#define IDD_CONFIG 2020
#define IDC_SPIN3 2020
@@ -31,6 +33,7 @@
#define IDC_LOGO10 2025
#define IDD_CAPTURE 2026
#define IDC_EDIT4 2027
#define IDD_GPUCONFIG 2027
#define IDR_CONVERT9_FX 10000
#define IDR_TFX9_FX 10001
#define IDR_MERGE9_FX 10002