This commit is contained in:
gabest
2009-01-31 01:16:24 +00:00
parent dfce193902
commit 1f509adde0
7 changed files with 304 additions and 196 deletions

View File

@@ -518,7 +518,7 @@ REG64_(GIFReg, ALPHA)
UINT32 FIX:8;
UINT32 _PAD2:24;
REG_END2
bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == 2 && C == 2 && D == 2 && FIX == 0x80);} // output will be Cs/As
__forceinline bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == 2 && C == 2 && D == 2 && FIX == 0x80);} // output will be Cs/As
REG_END2
REG64_(GIFReg, BITBLTBUF)

View File

@@ -1277,7 +1277,7 @@ public:
GSVector4i rm = m_rxxx;
GSVector4i gm = m_xgxx;
GSVector4i bm = m_xxbx;
GSVector4i am = m_xxxa;
// GSVector4i am = m_xxxa;
GSVector4i l, h;
if(TEXA.AEM)
@@ -1285,20 +1285,32 @@ public:
for(int i = 0; i < 8; i++, dst += dstpitch)
{
GSVector4i v0 = s[i * 2 + 0];
/*
l = v0.upl16();
h = v0.uph16();
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
*/
l = v0.upl16(v0);
h = v0.uph16(v0);
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
GSVector4i v1 = s[i * 2 + 1];
/*
l = v1.upl16();
h = v1.uph16();
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
*/
l = v1.upl16(v1);
h = v1.uph16(v1);
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
}
}
else
@@ -1306,20 +1318,32 @@ public:
for(int i = 0; i < 8; i++, dst += dstpitch)
{
GSVector4i v0 = s[i * 2 + 0];
/*
l = v0.upl16();
h = v0.uph16();
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
*/
l = v0.upl16(v0);
h = v0.uph16(v0);
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
GSVector4i v1 = s[i * 2 + 1];
/*
l = v1.upl16();
h = v1.uph16();
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
*/
l = v1.upl16(v1);
h = v1.uph16(v1);
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
}
}

View File

@@ -783,7 +783,7 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons
const GSVector4i rm = s_rm;
const GSVector4i gm = s_gm;
const GSVector4i bm = s_bm;
const GSVector4i am = s_am;
// const GSVector4i am = s_am;
GSVector4i TA0(TEXA.TA0 << 24);
GSVector4i TA1(TEXA.TA1 << 24);
@@ -798,10 +798,16 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons
for(int i = 0, j = w >> 3; i < j; i++)
{
c = s[i];
/*
cl = c.upl16();
ch = c.uph16();
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am);
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am);
*/
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15));
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15));
}
}
else
@@ -809,10 +815,16 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons
for(int i = 0, j = w >> 3; i < j; i++)
{
c = s[i];
/*
cl = c.upl16();
ch = c.uph16();
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am).andnot(cl == GSVector4i::zero());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am).andnot(ch == GSVector4i::zero());
*/
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)).andnot(ch == GSVector4i::zero());
}
}

View File

@@ -21,7 +21,6 @@
// TODO:
// - if iip == 0 && tfx == TFX_NONE && fog == 0 && abe a/b/c != 1 => e = (a - b) * c, dst = e + d (simple addus8)
// - detect and convert quads to sprite
#include "StdAfx.h"
#include "GSDrawScanline.h"

View File

@@ -44,8 +44,6 @@ protected:
m_reset = true;
m_vtrace.Reset();
__super::Reset();
}
@@ -130,7 +128,7 @@ protected:
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GSVector4i af = GSVector4i(m_vtrace.min.c.wwww(m_vtrace.max.c)) >> 7;
GSVector4i af = GSVector4i(m_vtrace.m_min.c.wwww(m_vtrace.m_max.c)) >> 7;
int amin, amax;
@@ -297,7 +295,7 @@ protected:
{
p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.eq.rgba != 15)
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.m_eq.rgba != 15)
{
p.sel.iip = PRIM->IIP;
}
@@ -312,7 +310,7 @@ protected:
if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc)
{
if(m_vtrace.eq.rgba == 15 && (m_vtrace.min.c == GSVector4(128.0f * 128.0f)).alltrue())
if(m_vtrace.m_eq.rgba == 15 && (m_vtrace.m_min.c == GSVector4(128.0f * 128.0f)).alltrue())
{
// modulate does not do anything when vertex color is 0x80
@@ -331,7 +329,7 @@ protected:
GSVertexSW* v = m_vertices;
if(m_vtrace.eq.q)
if(m_vtrace.m_eq.q)
{
p.sel.fst = 1;
@@ -344,8 +342,8 @@ protected:
v[i].t *= w;
}
m_vtrace.min.t *= w;
m_vtrace.max.t *= w;
m_vtrace.m_min.t *= w;
m_vtrace.m_max.t *= w;
}
}
else if(primclass == GS_SPRITE_CLASS)
@@ -369,8 +367,8 @@ protected:
tmax = tmax.maxv(v0).maxv(v1);
}
m_vtrace.max.t = tmax;
m_vtrace.min.t = tmin;
m_vtrace.m_max.t = tmax;
m_vtrace.m_min.t = tmin;
}
}
@@ -389,8 +387,8 @@ protected:
v[i].t -= half;
}
m_vtrace.min.t -= half;
m_vtrace.max.t += half;
m_vtrace.m_min.t -= half;
m_vtrace.m_max.t += half;
}
}
/*
@@ -482,7 +480,7 @@ protected:
void Draw()
{
m_vtrace.Update();
m_vtrace.Update(m_vertices, m_count);
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
@@ -490,80 +488,80 @@ protected:
GetScanlineParam(p, primclass);
if((p.fm & p.zm) != 0xffffffff)
if((p.fm & p.zm) == 0xffffffff)
{
if(s_dump)
{
CString str;
str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
if(PRIM->TME) if(s_save) {m_mem.SaveBMP(str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);}
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
}
GSRasterizerData data;
data.scissor = GSVector4i(m_context->scissor.in);
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.primclass = primclass;
data.vertices = m_vertices;
data.count = m_count;
data.param = &p;
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
GSVector4i pos(m_vtrace.min.p.xyxy(m_vtrace.max.p));
GSVector4i scissor = data.scissor;
CRect r;
r.left = max(scissor.x, min(scissor.z, pos.x));
r.top = max(scissor.y, min(scissor.w, pos.y));
r.right = max(scissor.x, min(scissor.z, pos.z));
r.bottom = max(scissor.y, min(scissor.w, pos.w));
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBW = m_context->FRAME.FBW;
if(p.fm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->FRAME.Block();
BITBLTBUF.DPSM = m_context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
if(p.zm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->ZBUF.Block();
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
if(s_dump)
{
CString str;
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
}
return;
}
m_vtrace.Reset();
if(s_dump)
{
CString str;
str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
if(PRIM->TME) if(s_save) {m_mem.SaveBMP(str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);}
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
}
GSRasterizerData data;
data.scissor = GSVector4i(m_context->scissor.in);
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.primclass = primclass;
data.vertices = m_vertices;
data.count = m_count;
data.param = &p;
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
GSVector4i pos(m_vtrace.m_min.p.xyxy(m_vtrace.m_max.p));
GSVector4i scissor = data.scissor;
CRect r;
r.left = max(scissor.x, min(scissor.z, pos.x));
r.top = max(scissor.y, min(scissor.w, pos.y));
r.right = max(scissor.x, min(scissor.z, pos.z));
r.bottom = max(scissor.y, min(scissor.w, pos.w));
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBW = m_context->FRAME.FBW;
if(p.fm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->FRAME.Block();
BITBLTBUF.DPSM = m_context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
if(p.zm != 0xffffffff)
{
BITBLTBUF.DBP = m_context->ZBUF.Block();
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
if(s_dump)
{
CString str;
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
}
}
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)
@@ -623,7 +621,7 @@ protected:
if(fst)
{
GSVector4i uv = GSVector4i(m_vtrace.min.t.xyxy(m_vtrace.max.t)).sra32(16);
GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16);
/*
int tw = context->TEX0.TW;
int th = context->TEX0.TH;
@@ -686,59 +684,6 @@ protected:
r &= CRect(0, 0, w, h);
}
template<int i0, int i1, int i2, int i3>
static bool IsQuad(GSVertexSW* v)
{
GSVector4 v0 = v[i0].p.xyxy(v[i0].t);
GSVector4 v1 = v[i1].p.xyxy(v[i1].t);
GSVector4 v2 = v[i2].p.xyxy(v[i2].t);
GSVector4 v3 = v[i3].p.xyxy(v[i3].t);
GSVector4 v4 = v0 == v1;
GSVector4 v5 = v0 == v2;
GSVector4 v6 = v3 == v1;
GSVector4 v7 = v3 == v2;
v1 = (v4 ^ v5) & (v6 ^ v7);
v2 = (v4 ^ v5.zwxy()) & (v6 ^ v7.zwxy());
if((v1 & v2 & (v0 != v3)).alltrue())
{
v0 = v[i0].p.zwzw(v[i0].t);
v1 = v[i1].p.zwzw(v[i1].t);
v2 = v[i2].p.zwzw(v[i2].t);
v3 = v[i3].p.zwzw(v[i3].t);
if(((v0 == v1) & (v0 == v2) & (v0 == v3)).alltrue())
{
v0 = v[i0].c;
v1 = v[i1].c;
v2 = v[i2].c;
v3 = v[i3].c;
if(((v0 == v1) & (v0 == v2) & (v0 == v3)).alltrue())
{
/*
printf("quad\n");
for(int i = 0; i < 6; i++)
{
printf("p = %.3f %.3f %.3f %.3f t = %.3f %.3f %.3f %.3f c = %.0f %.0f %.0f %.0f\n",
v[i].p.x, v[i].p.y, v[i].p.z, v[i].p.w,
v[i].t.x, v[i].t.y, v[i].t.z, v[i].t.w,
v[i].c.x / 128, v[i].c.y / 128, v[i].c.z / 128, v[i].c.w / 128);
}
*/
return true;
}
}
}
return false;
}
public:
GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads)
: GSRendererT(base, mt, irq, nloophack, rs)
@@ -842,71 +787,72 @@ public:
return;
}
m_vtrace.min.p = m_vtrace.min.p.minv(pmin);
m_vtrace.max.p = m_vtrace.max.p.maxv(pmax);
switch(prim)
{
case GS_POINTLIST:
if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t);
if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t);
m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c);
m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c);
break;
case GS_LINELIST:
case GS_LINESTRIP:
if(PRIM->IIP == 0) {v[0].c = v[1].c;}
if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t);
if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t);
m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c);
m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;}
if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t.minv(v[2].t));
if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t.maxv(v[2].t));
m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c.minv(v[2].c));
m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c.maxv(v[2].c));
break;
case GS_SPRITE:
if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t);
if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t);
m_vtrace.min.c = m_vtrace.min.c.minv(v[1].c);
m_vtrace.max.c = m_vtrace.max.c.maxv(v[1].c);
break;
}
m_count += count;
if(m_count == 6)
if(m_count >= 3 && m_count < 30)
{
GSVertexSW* v = &m_vertices[m_count - 3];
int tl = 0;
int br = 0;
bool isquad = false;
switch(prim)
{
case GS_TRIANGLESTRIP:
isquad = IsQuad<0, 1, 2, 5>(m_vertices);
break;
case GS_TRIANGLEFAN:
isquad = IsQuad<1, 0, 2, 5>(m_vertices);
case GS_TRIANGLELIST:
isquad = GSVertexSW::IsQuad(v, tl, br);
break;
// TODO: case GS_TRIANGLELIST:
}
if(isquad)
{
m_vertices[prim == GS_TRIANGLESTRIP ? 1 : 0] = m_vertices[5];
m_count -= 3;
if(m_count > 0)
{
tl += m_count;
br += m_count;
Flush();
}
if(tl != 0) m_vertices[0] = m_vertices[tl];
if(br != 1) m_vertices[1] = m_vertices[br];
m_count = 2;
UINT32 tmp = PRIM->PRIM;
PRIM->PRIM = GS_SPRITE;
Flush();
PRIM->PRIM = tmp;
m_count = 0;
m_perfmon.Put(GSPerfMon::Quad, 1);
return;
}
}
m_count += count;
}
}
};

View File

@@ -2341,6 +2341,11 @@ public:
return _mm_movemask_ps(m) == 0xf;
}
bool allfalse() const
{
return _mm_movemask_ps(m) == 0;
}
// TODO: insert
template<int i> int extract() const

View File

@@ -25,10 +25,8 @@
__declspec(align(16)) union GSVertexSW
{
typedef GSVector4 Vector;
struct {Vector c, p, t;};
struct {Vector v[3];};
struct {GSVector4 c, p, t;};
struct {GSVector4 v[3];};
struct {float f[12];};
GSVertexSW() {}
@@ -39,10 +37,125 @@ __declspec(align(16)) union GSVertexSW
friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2);
friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2);
friend GSVertexSW operator * (const GSVertexSW& v, const Vector& vv);
friend GSVertexSW operator / (const GSVertexSW& v, const Vector& vv);
friend GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv);
friend GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv);
friend GSVertexSW operator * (const GSVertexSW& v, float f);
friend GSVertexSW operator / (const GSVertexSW& v, float f);
static bool IsQuad(const GSVertexSW* v, int& tl, int& br)
{
GSVector4 v0 = v[0].p.xyxy(v[0].t);
GSVector4 v1 = v[1].p.xyxy(v[1].t);
GSVector4 v2 = v[2].p.xyxy(v[2].t);
GSVector4 v01 = v0 == v1;
GSVector4 v12 = v1 == v2;
GSVector4 v02 = v0 == v2;
GSVector4 vtl, vbr;
GSVector4 test;
int i;
if(v12.allfalse())
{
test = (v01 ^ v02) & (v01 ^ v02.zwxy());
vtl = v0;
vbr = v1 + (v2 - v0);
i = 0;
}
else if(v02.allfalse())
{
test = (v01 ^ v12) & (v01 ^ v12.zwxy());
vtl = v1;
vbr = v0 + (v2 - v1);
i = 1;
}
else if(v01.allfalse())
{
test = (v02 ^ v12) & (v02 ^ v12.zwxy());
vtl = v2;
vbr = v0 + (v1 - v2);
i = 2;
}
else
{
return false;
}
if(!test.alltrue())
{
return false;
}
tl = i;
GSVector4 v3 = v[3].p.xyxy(v[3].t);
GSVector4 v4 = v[4].p.xyxy(v[4].t);
GSVector4 v5 = v[5].p.xyxy(v[5].t);
GSVector4 v34 = v3 == v4;
GSVector4 v45 = v4 == v5;
GSVector4 v35 = v3 == v5;
if(v34.allfalse())
{
test = (v35 ^ v45) & (v35 ^ v45.zwxy()) & (vtl == v3 + (v4 - v5)) & (vbr == v5);
i = 5;
}
else if(v35.allfalse())
{
test = (v34 ^ v45) & (v34 ^ v45.zwxy()) & (vtl == v3 + (v5 - v4)) & (vbr == v4);
i = 4;
}
else if(v45.allfalse())
{
test = (v34 ^ v35) & (v34 ^ v35.zwxy()) & (vtl == v5 + (v4 - v3)) & (vbr == v3);
i = 3;
}
else
{
return false;
}
if(!test.alltrue())
{
return false;
}
br = i;
v0 = v[0].p.zwzw(v[0].t);
v1 = v[1].p.zwzw(v[1].t);
v2 = v[2].p.zwzw(v[2].t);
v3 = v[3].p.zwzw(v[3].t);
v4 = v[4].p.zwzw(v[4].t);
v5 = v[5].p.zwzw(v[5].t);
test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5);
if(!test.alltrue())
{
return false;
}
v0 = v[0].c;
v1 = v[1].c;
v2 = v[2].c;
v3 = v[3].c;
v4 = v[4].c;
v5 = v[5].c;
test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5);
if(!test.alltrue())
{
return false;
}
return true;
}
};
__forceinline GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2)
@@ -63,7 +176,7 @@ __forceinline GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2)
return v0;
}
__forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVertexSW::Vector& vv)
__forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv)
{
GSVertexSW v0;
v0.c = v.c * vv;
@@ -72,7 +185,7 @@ __forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVertexSW::Vect
return v0;
}
__forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVertexSW::Vector& vv)
__forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv)
{
GSVertexSW v0;
v0.c = v.c / vv;
@@ -84,7 +197,7 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVertexSW::Vect
__forceinline GSVertexSW operator * (const GSVertexSW& v, float f)
{
GSVertexSW v0;
GSVertexSW::Vector vf(f);
GSVector4 vf(f);
v0.c = v.c * vf;
v0.p = v.p * vf;
v0.t = v.t * vf;
@@ -94,7 +207,7 @@ __forceinline GSVertexSW operator * (const GSVertexSW& v, float f)
__forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
{
GSVertexSW v0;
GSVertexSW::Vector vf(f);
GSVector4 vf(f);
v0.c = v.c / vf;
v0.p = v.p / vf;
v0.t = v.t / vf;
@@ -103,32 +216,41 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
__declspec(align(16)) struct GSVertexTrace
{
GSVertexSW min, max;
GSVertexSW m_min, m_max;
union
{
DWORD value;
struct {DWORD x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1, r:1, g:1, b:1, a:1;};
struct {DWORD xyzf:4, stq:4, rgba:4;};
} eq;
} m_eq;
GSVertexTrace()
void Update(const GSVertexSW* v, int count)
{
Reset();
}
GSVertexSW min, max;
void Update()
{
eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8);
}
void Reset()
{
min.p = GSVector4(FLT_MAX);
max.p = GSVector4(-FLT_MAX);
min.t = GSVector4(FLT_MAX);
max.t = GSVector4(-FLT_MAX);
min.c = GSVector4(FLT_MAX);
max.c = GSVector4::zero();
for(int i = 0; i < count; i++)
{
min.p = min.p.minv(v[i].p);
max.p = max.p.maxv(v[i].p);
min.t = min.t.minv(v[i].t);
max.t = max.t.maxv(v[i].t);
min.c = min.c.minv(v[i].c);
max.c = max.c.maxv(v[i].c);
}
m_min = min;
m_max = max;
m_eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8);
}
};