diff --git a/gsdx/GS.h b/gsdx/GS.h index cc9c3aa..6623688 100644 --- a/gsdx/GS.h +++ b/gsdx/GS.h @@ -518,7 +518,7 @@ REG64_(GIFReg, ALPHA) UINT32 FIX:8; UINT32 _PAD2:24; REG_END2 - bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == 2 && C == 2 && D == 2 && FIX == 0x80);} // output will be Cs/As + __forceinline bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == 2 && C == 2 && D == 2 && FIX == 0x80);} // output will be Cs/As REG_END2 REG64_(GIFReg, BITBLTBUF) diff --git a/gsdx/GSBlock.h b/gsdx/GSBlock.h index aba27d7..6885b07 100644 --- a/gsdx/GSBlock.h +++ b/gsdx/GSBlock.h @@ -1277,7 +1277,7 @@ public: GSVector4i rm = m_rxxx; GSVector4i gm = m_xgxx; GSVector4i bm = m_xxbx; - GSVector4i am = m_xxxa; + // GSVector4i am = m_xxxa; GSVector4i l, h; if(TEXA.AEM) @@ -1285,20 +1285,32 @@ public: for(int i = 0; i < 8; i++, dst += dstpitch) { GSVector4i v0 = s[i * 2 + 0]; - +/* l = v0.upl16(); h = v0.uph16(); ((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero()); ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero()); +*/ + l = v0.upl16(v0); + h = v0.uph16(v0); + + ((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero()); + ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero()); GSVector4i v1 = s[i * 2 + 1]; - +/* l = v1.upl16(); h = v1.uph16(); ((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero()); ((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero()); +*/ + l = v1.upl16(v1); + h = v1.uph16(v1); + + ((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero()); + ((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero()); } } else @@ -1306,20 +1318,32 @@ public: for(int i = 0; i < 8; i++, dst += dstpitch) { GSVector4i v0 = s[i * 2 + 0]; - +/* l = v0.upl16(); h = v0.uph16(); ((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am); ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am); +*/ + l = v0.upl16(v0); + h = v0.uph16(v0); + + ((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15)); + ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15)); GSVector4i v1 = s[i * 2 + 1]; - +/* l = v1.upl16(); h = v1.uph16(); ((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am); ((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am); +*/ + l = v1.upl16(v1); + h = v1.uph16(v1); + + ((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15)); + ((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15)); } } diff --git a/gsdx/GSClut.cpp b/gsdx/GSClut.cpp index 06b6f5f..7cbc883 100644 --- a/gsdx/GSClut.cpp +++ b/gsdx/GSClut.cpp @@ -783,7 +783,7 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons const GSVector4i rm = s_rm; const GSVector4i gm = s_gm; const GSVector4i bm = s_bm; - const GSVector4i am = s_am; + // const GSVector4i am = s_am; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i TA1(TEXA.TA1 << 24); @@ -798,10 +798,16 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons for(int i = 0, j = w >> 3; i < j; i++) { c = s[i]; + /* cl = c.upl16(); ch = c.uph16(); d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am); d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am); + */ + cl = c.upl16(c); + ch = c.uph16(c); + d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)); + d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)); } } else @@ -809,10 +815,16 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons for(int i = 0, j = w >> 3; i < j; i++) { c = s[i]; + /* cl = c.upl16(); ch = c.uph16(); d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am).andnot(cl == GSVector4i::zero()); d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am).andnot(ch == GSVector4i::zero()); + */ + cl = c.upl16(c); + ch = c.uph16(c); + d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero()); + d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)).andnot(ch == GSVector4i::zero()); } } diff --git a/gsdx/GSDrawScanline.cpp b/gsdx/GSDrawScanline.cpp index 239e0ad..aa65d31 100644 --- a/gsdx/GSDrawScanline.cpp +++ b/gsdx/GSDrawScanline.cpp @@ -21,7 +21,6 @@ // TODO: // - if iip == 0 && tfx == TFX_NONE && fog == 0 && abe a/b/c != 1 => e = (a - b) * c, dst = e + d (simple addus8) -// - detect and convert quads to sprite #include "StdAfx.h" #include "GSDrawScanline.h" diff --git a/gsdx/GSRendererSW.h b/gsdx/GSRendererSW.h index f221fcb..b1bbd57 100644 --- a/gsdx/GSRendererSW.h +++ b/gsdx/GSRendererSW.h @@ -44,8 +44,6 @@ protected: m_reset = true; - m_vtrace.Reset(); - __super::Reset(); } @@ -130,7 +128,7 @@ protected: } else if(context->TEST.ATST != ATST_ALWAYS) { - GSVector4i af = GSVector4i(m_vtrace.min.c.wwww(m_vtrace.max.c)) >> 7; + GSVector4i af = GSVector4i(m_vtrace.m_min.c.wwww(m_vtrace.m_max.c)) >> 7; int amin, amax; @@ -297,7 +295,7 @@ protected: { p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM); - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.eq.rgba != 15) + if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.m_eq.rgba != 15) { p.sel.iip = PRIM->IIP; } @@ -312,7 +310,7 @@ protected: if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc) { - if(m_vtrace.eq.rgba == 15 && (m_vtrace.min.c == GSVector4(128.0f * 128.0f)).alltrue()) + if(m_vtrace.m_eq.rgba == 15 && (m_vtrace.m_min.c == GSVector4(128.0f * 128.0f)).alltrue()) { // modulate does not do anything when vertex color is 0x80 @@ -331,7 +329,7 @@ protected: GSVertexSW* v = m_vertices; - if(m_vtrace.eq.q) + if(m_vtrace.m_eq.q) { p.sel.fst = 1; @@ -344,8 +342,8 @@ protected: v[i].t *= w; } - m_vtrace.min.t *= w; - m_vtrace.max.t *= w; + m_vtrace.m_min.t *= w; + m_vtrace.m_max.t *= w; } } else if(primclass == GS_SPRITE_CLASS) @@ -369,8 +367,8 @@ protected: tmax = tmax.maxv(v0).maxv(v1); } - m_vtrace.max.t = tmax; - m_vtrace.min.t = tmin; + m_vtrace.m_max.t = tmax; + m_vtrace.m_min.t = tmin; } } @@ -389,8 +387,8 @@ protected: v[i].t -= half; } - m_vtrace.min.t -= half; - m_vtrace.max.t += half; + m_vtrace.m_min.t -= half; + m_vtrace.m_max.t += half; } } /* @@ -482,7 +480,7 @@ protected: void Draw() { - m_vtrace.Update(); + m_vtrace.Update(m_vertices, m_count); GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM); @@ -490,80 +488,80 @@ protected: GetScanlineParam(p, primclass); - if((p.fm & p.zm) != 0xffffffff) + if((p.fm & p.zm) == 0xffffffff) { - if(s_dump) - { - CString str; - str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - if(PRIM->TME) if(s_save) {m_mem.SaveBMP(str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);} - str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); - if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); - str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); - if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} - } - - GSRasterizerData data; - - data.scissor = GSVector4i(m_context->scissor.in); - data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - data.primclass = primclass; - data.vertices = m_vertices; - data.count = m_count; - data.param = &p; - - m_rl.Draw(&data); - - GSRasterizerStats stats; - - m_rl.GetStats(stats); - - m_perfmon.Put(GSPerfMon::Draw, 1); - m_perfmon.Put(GSPerfMon::Prim, stats.prims); - m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); - - GSVector4i pos(m_vtrace.min.p.xyxy(m_vtrace.max.p)); - - GSVector4i scissor = data.scissor; - - CRect r; - - r.left = max(scissor.x, min(scissor.z, pos.x)); - r.top = max(scissor.y, min(scissor.w, pos.y)); - r.right = max(scissor.x, min(scissor.z, pos.z)); - r.bottom = max(scissor.y, min(scissor.w, pos.w)); - - GIFRegBITBLTBUF BITBLTBUF; - - BITBLTBUF.DBW = m_context->FRAME.FBW; - - if(p.fm != 0xffffffff) - { - BITBLTBUF.DBP = m_context->FRAME.Block(); - BITBLTBUF.DPSM = m_context->FRAME.PSM; - - m_tc->InvalidateVideoMem(BITBLTBUF, r); - } - - if(p.zm != 0xffffffff) - { - BITBLTBUF.DBP = m_context->ZBUF.Block(); - BITBLTBUF.DPSM = m_context->ZBUF.PSM; - - m_tc->InvalidateVideoMem(BITBLTBUF, r); - } - - if(s_dump) - { - CString str; - str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); - if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); - str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); - if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} - } + return; } - m_vtrace.Reset(); + if(s_dump) + { + CString str; + str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + if(PRIM->TME) if(s_save) {m_mem.SaveBMP(str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);} + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); + if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} + } + + GSRasterizerData data; + + data.scissor = GSVector4i(m_context->scissor.in); + data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour + data.primclass = primclass; + data.vertices = m_vertices; + data.count = m_count; + data.param = &p; + + m_rl.Draw(&data); + + GSRasterizerStats stats; + + m_rl.GetStats(stats); + + m_perfmon.Put(GSPerfMon::Draw, 1); + m_perfmon.Put(GSPerfMon::Prim, stats.prims); + m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); + + GSVector4i pos(m_vtrace.m_min.p.xyxy(m_vtrace.m_max.p)); + + GSVector4i scissor = data.scissor; + + CRect r; + + r.left = max(scissor.x, min(scissor.z, pos.x)); + r.top = max(scissor.y, min(scissor.w, pos.y)); + r.right = max(scissor.x, min(scissor.z, pos.z)); + r.bottom = max(scissor.y, min(scissor.w, pos.w)); + + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.DBW = m_context->FRAME.FBW; + + if(p.fm != 0xffffffff) + { + BITBLTBUF.DBP = m_context->FRAME.Block(); + BITBLTBUF.DPSM = m_context->FRAME.PSM; + + m_tc->InvalidateVideoMem(BITBLTBUF, r); + } + + if(p.zm != 0xffffffff) + { + BITBLTBUF.DBP = m_context->ZBUF.Block(); + BITBLTBUF.DPSM = m_context->ZBUF.PSM; + + m_tc->InvalidateVideoMem(BITBLTBUF, r); + } + + if(s_dump) + { + CString str; + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM); + if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy); + str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM); + if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);} + } } void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) @@ -623,7 +621,7 @@ protected: if(fst) { - GSVector4i uv = GSVector4i(m_vtrace.min.t.xyxy(m_vtrace.max.t)).sra32(16); + GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16); /* int tw = context->TEX0.TW; int th = context->TEX0.TH; @@ -686,59 +684,6 @@ protected: r &= CRect(0, 0, w, h); } - template - static bool IsQuad(GSVertexSW* v) - { - GSVector4 v0 = v[i0].p.xyxy(v[i0].t); - GSVector4 v1 = v[i1].p.xyxy(v[i1].t); - GSVector4 v2 = v[i2].p.xyxy(v[i2].t); - GSVector4 v3 = v[i3].p.xyxy(v[i3].t); - - GSVector4 v4 = v0 == v1; - GSVector4 v5 = v0 == v2; - GSVector4 v6 = v3 == v1; - GSVector4 v7 = v3 == v2; - - v1 = (v4 ^ v5) & (v6 ^ v7); - v2 = (v4 ^ v5.zwxy()) & (v6 ^ v7.zwxy()); - - if((v1 & v2 & (v0 != v3)).alltrue()) - { - v0 = v[i0].p.zwzw(v[i0].t); - v1 = v[i1].p.zwzw(v[i1].t); - v2 = v[i2].p.zwzw(v[i2].t); - v3 = v[i3].p.zwzw(v[i3].t); - - if(((v0 == v1) & (v0 == v2) & (v0 == v3)).alltrue()) - { - v0 = v[i0].c; - v1 = v[i1].c; - v2 = v[i2].c; - v3 = v[i3].c; - - if(((v0 == v1) & (v0 == v2) & (v0 == v3)).alltrue()) - { - /* - printf("quad\n"); - - for(int i = 0; i < 6; i++) - { - printf("p = %.3f %.3f %.3f %.3f t = %.3f %.3f %.3f %.3f c = %.0f %.0f %.0f %.0f\n", - v[i].p.x, v[i].p.y, v[i].p.z, v[i].p.w, - v[i].t.x, v[i].t.y, v[i].t.z, v[i].t.w, - v[i].c.x / 128, v[i].c.y / 128, v[i].c.z / 128, v[i].c.w / 128); - } - */ - - return true; - } - } - } - - return false; - } - - public: GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads) : GSRendererT(base, mt, irq, nloophack, rs) @@ -842,71 +787,72 @@ public: return; } - m_vtrace.min.p = m_vtrace.min.p.minv(pmin); - m_vtrace.max.p = m_vtrace.max.p.maxv(pmax); - switch(prim) { case GS_POINTLIST: - if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t); - if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t); - m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c); - m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c); break; case GS_LINELIST: case GS_LINESTRIP: if(PRIM->IIP == 0) {v[0].c = v[1].c;} - if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t); - if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t); - m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c); - m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c); break; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: case GS_TRIANGLEFAN: if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} - if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t.minv(v[2].t)); - if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t.maxv(v[2].t)); - m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c.minv(v[2].c)); - m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c.maxv(v[2].c)); break; case GS_SPRITE: - if(tme) m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t); - if(tme) m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t); - m_vtrace.min.c = m_vtrace.min.c.minv(v[1].c); - m_vtrace.max.c = m_vtrace.max.c.maxv(v[1].c); break; } - m_count += count; - - if(m_count == 6) + if(m_count >= 3 && m_count < 30) { + GSVertexSW* v = &m_vertices[m_count - 3]; + + int tl = 0; + int br = 0; + bool isquad = false; switch(prim) { case GS_TRIANGLESTRIP: - isquad = IsQuad<0, 1, 2, 5>(m_vertices); - break; case GS_TRIANGLEFAN: - isquad = IsQuad<1, 0, 2, 5>(m_vertices); + case GS_TRIANGLELIST: + isquad = GSVertexSW::IsQuad(v, tl, br); break; - // TODO: case GS_TRIANGLELIST: } if(isquad) { - m_vertices[prim == GS_TRIANGLESTRIP ? 1 : 0] = m_vertices[5]; + m_count -= 3; + + if(m_count > 0) + { + tl += m_count; + br += m_count; + + Flush(); + } + + if(tl != 0) m_vertices[0] = m_vertices[tl]; + if(br != 1) m_vertices[1] = m_vertices[br]; + m_count = 2; + UINT32 tmp = PRIM->PRIM; PRIM->PRIM = GS_SPRITE; + Flush(); + PRIM->PRIM = tmp; - m_count = 0; + m_perfmon.Put(GSPerfMon::Quad, 1); + + return; } } + + m_count += count; } } }; diff --git a/gsdx/GSVector.h b/gsdx/GSVector.h index 9d7bcfc..a56111a 100644 --- a/gsdx/GSVector.h +++ b/gsdx/GSVector.h @@ -2341,6 +2341,11 @@ public: return _mm_movemask_ps(m) == 0xf; } + bool allfalse() const + { + return _mm_movemask_ps(m) == 0; + } + // TODO: insert template int extract() const diff --git a/gsdx/GSVertexSW.h b/gsdx/GSVertexSW.h index cdc11ef..8c1bd4b 100644 --- a/gsdx/GSVertexSW.h +++ b/gsdx/GSVertexSW.h @@ -25,10 +25,8 @@ __declspec(align(16)) union GSVertexSW { - typedef GSVector4 Vector; - - struct {Vector c, p, t;}; - struct {Vector v[3];}; + struct {GSVector4 c, p, t;}; + struct {GSVector4 v[3];}; struct {float f[12];}; GSVertexSW() {} @@ -39,10 +37,125 @@ __declspec(align(16)) union GSVertexSW friend GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2); friend GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2); - friend GSVertexSW operator * (const GSVertexSW& v, const Vector& vv); - friend GSVertexSW operator / (const GSVertexSW& v, const Vector& vv); + friend GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv); + friend GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv); friend GSVertexSW operator * (const GSVertexSW& v, float f); friend GSVertexSW operator / (const GSVertexSW& v, float f); + + static bool IsQuad(const GSVertexSW* v, int& tl, int& br) + { + GSVector4 v0 = v[0].p.xyxy(v[0].t); + GSVector4 v1 = v[1].p.xyxy(v[1].t); + GSVector4 v2 = v[2].p.xyxy(v[2].t); + + GSVector4 v01 = v0 == v1; + GSVector4 v12 = v1 == v2; + GSVector4 v02 = v0 == v2; + + GSVector4 vtl, vbr; + + GSVector4 test; + + int i; + + if(v12.allfalse()) + { + test = (v01 ^ v02) & (v01 ^ v02.zwxy()); + vtl = v0; + vbr = v1 + (v2 - v0); + i = 0; + } + else if(v02.allfalse()) + { + test = (v01 ^ v12) & (v01 ^ v12.zwxy()); + vtl = v1; + vbr = v0 + (v2 - v1); + i = 1; + } + else if(v01.allfalse()) + { + test = (v02 ^ v12) & (v02 ^ v12.zwxy()); + vtl = v2; + vbr = v0 + (v1 - v2); + i = 2; + } + else + { + return false; + } + + if(!test.alltrue()) + { + return false; + } + + tl = i; + + GSVector4 v3 = v[3].p.xyxy(v[3].t); + GSVector4 v4 = v[4].p.xyxy(v[4].t); + GSVector4 v5 = v[5].p.xyxy(v[5].t); + + GSVector4 v34 = v3 == v4; + GSVector4 v45 = v4 == v5; + GSVector4 v35 = v3 == v5; + + if(v34.allfalse()) + { + test = (v35 ^ v45) & (v35 ^ v45.zwxy()) & (vtl == v3 + (v4 - v5)) & (vbr == v5); + i = 5; + } + else if(v35.allfalse()) + { + test = (v34 ^ v45) & (v34 ^ v45.zwxy()) & (vtl == v3 + (v5 - v4)) & (vbr == v4); + i = 4; + } + else if(v45.allfalse()) + { + test = (v34 ^ v35) & (v34 ^ v35.zwxy()) & (vtl == v5 + (v4 - v3)) & (vbr == v3); + i = 3; + } + else + { + return false; + } + + if(!test.alltrue()) + { + return false; + } + + br = i; + + v0 = v[0].p.zwzw(v[0].t); + v1 = v[1].p.zwzw(v[1].t); + v2 = v[2].p.zwzw(v[2].t); + v3 = v[3].p.zwzw(v[3].t); + v4 = v[4].p.zwzw(v[4].t); + v5 = v[5].p.zwzw(v[5].t); + + test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); + + if(!test.alltrue()) + { + return false; + } + + v0 = v[0].c; + v1 = v[1].c; + v2 = v[2].c; + v3 = v[3].c; + v4 = v[4].c; + v5 = v[5].c; + + test = ((v0 == v1) & (v0 == v2)) & ((v0 == v3) & (v0 == v4)) & (v0 == v5); + + if(!test.alltrue()) + { + return false; + } + + return true; + } }; __forceinline GSVertexSW operator + (const GSVertexSW& v1, const GSVertexSW& v2) @@ -63,7 +176,7 @@ __forceinline GSVertexSW operator - (const GSVertexSW& v1, const GSVertexSW& v2) return v0; } -__forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVertexSW::Vector& vv) +__forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVector4& vv) { GSVertexSW v0; v0.c = v.c * vv; @@ -72,7 +185,7 @@ __forceinline GSVertexSW operator * (const GSVertexSW& v, const GSVertexSW::Vect return v0; } -__forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVertexSW::Vector& vv) +__forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVector4& vv) { GSVertexSW v0; v0.c = v.c / vv; @@ -84,7 +197,7 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, const GSVertexSW::Vect __forceinline GSVertexSW operator * (const GSVertexSW& v, float f) { GSVertexSW v0; - GSVertexSW::Vector vf(f); + GSVector4 vf(f); v0.c = v.c * vf; v0.p = v.p * vf; v0.t = v.t * vf; @@ -94,7 +207,7 @@ __forceinline GSVertexSW operator * (const GSVertexSW& v, float f) __forceinline GSVertexSW operator / (const GSVertexSW& v, float f) { GSVertexSW v0; - GSVertexSW::Vector vf(f); + GSVector4 vf(f); v0.c = v.c / vf; v0.p = v.p / vf; v0.t = v.t / vf; @@ -103,32 +216,41 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f) __declspec(align(16)) struct GSVertexTrace { - GSVertexSW min, max; + GSVertexSW m_min, m_max; union { DWORD value; struct {DWORD x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1, r:1, g:1, b:1, a:1;}; struct {DWORD xyzf:4, stq:4, rgba:4;}; - } eq; + } m_eq; - GSVertexTrace() + void Update(const GSVertexSW* v, int count) { - Reset(); - } + GSVertexSW min, max; - void Update() - { - eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8); - } - - void Reset() - { min.p = GSVector4(FLT_MAX); max.p = GSVector4(-FLT_MAX); min.t = GSVector4(FLT_MAX); max.t = GSVector4(-FLT_MAX); min.c = GSVector4(FLT_MAX); max.c = GSVector4::zero(); + + for(int i = 0; i < count; i++) + { + min.p = min.p.minv(v[i].p); + max.p = max.p.maxv(v[i].p); + + min.t = min.t.minv(v[i].t); + max.t = max.t.maxv(v[i].t); + + min.c = min.c.minv(v[i].c); + max.c = max.c.maxv(v[i].c); + } + + m_min = min; + m_max = max; + + m_eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8); } };