diff --git a/gsdx/GS.h b/gsdx/GS.h index 7b2b295..7f4a4f4 100644 --- a/gsdx/GS.h +++ b/gsdx/GS.h @@ -700,9 +700,7 @@ REG64_(GIFReg, PRIM) UINT32 FIX:1; UINT32 _PAD1:21; UINT32 _PAD2:32; -REG_END2 - bool TME_FST() {return (ai32[0] & 0x110) == 0x110;} -REG_END2 +REG_END REG64_(GIFReg, PRMODE) UINT32 _PRIM:3; diff --git a/gsdx/GSBlock.cpp b/gsdx/GSBlock.cpp index 605fef5..1a85542 100644 --- a/gsdx/GSBlock.cpp +++ b/gsdx/GSBlock.cpp @@ -26,7 +26,6 @@ const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11 const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); -const GSVector4i GSBlock::m_rgbx(0x00ffffff); const GSVector4i GSBlock::m_xxxa(0x00008000); const GSVector4i GSBlock::m_xxbx(0x00007c00); const GSVector4i GSBlock::m_xgxx(0x000003e0); diff --git a/gsdx/GSBlock.h b/gsdx/GSBlock.h index b7af49a..aba27d7 100644 --- a/gsdx/GSBlock.h +++ b/gsdx/GSBlock.h @@ -31,7 +31,6 @@ class GSBlock static const GSVector4i m_r8mask; static const GSVector4i m_r4mask; - static const GSVector4i m_rgbx; static const GSVector4i m_xxxa; static const GSVector4i m_xxbx; static const GSVector4i m_xgxx; @@ -1040,7 +1039,7 @@ public: { #if _M_SSE >= 0x200 - GSVector4i rgbx = m_rgbx; + GSVector4i mask = GSVector4i::x00ffffff(); for(int i = 0; i < 4; i++, src += srcpitch * 2) { @@ -1048,19 +1047,19 @@ public: GSVector4i v1 = GSVector4i::load(src + 16, src + srcpitch); GSVector4i v2 = GSVector4i::load(src + srcpitch + 8); - ((GSVector4i*)dst)[i * 4 + 0] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & rgbx; + ((GSVector4i*)dst)[i * 4 + 0] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & mask; v0 = v0.srl<12>(v1); - ((GSVector4i*)dst)[i * 4 + 1] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & rgbx; + ((GSVector4i*)dst)[i * 4 + 1] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & mask; v0 = v1.srl<8>(v2); - ((GSVector4i*)dst)[i * 4 + 2] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & rgbx; + ((GSVector4i*)dst)[i * 4 + 2] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & mask; v0 = v2.srl<4>(); - ((GSVector4i*)dst)[i * 4 + 3] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & rgbx; + ((GSVector4i*)dst)[i * 4 + 3] = v0.upl32(v0.srl<3>()).upl64(v0.srl<6>().upl32(v0.srl<9>())) & mask; } #else @@ -1215,7 +1214,7 @@ public: const GSVector4i* s = (const GSVector4i*)src; GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i mask = m_rgbx; + GSVector4i mask = GSVector4i::x00ffffff(); for(int i = 0; i < 4; i++, dst += dstpitch * 2) { @@ -1880,7 +1879,7 @@ public: const GSVector4i* s = (const GSVector4i*)src; GSVector4i TA0(TEXA.TA0 << 24); - GSVector4i mask = m_rgbx; + GSVector4i mask = GSVector4i::x00ffffff(); for(int i = 0; i < 4; i++, dst += dstpitch * 2) { diff --git a/gsdx/GSCrc.cpp b/gsdx/GSCrc.cpp index 6bfb457..96d37f5 100644 --- a/gsdx/GSCrc.cpp +++ b/gsdx/GSCrc.cpp @@ -51,10 +51,12 @@ CRC::Game CRC::m_games[] = {0xc1274668, FFXII, EU, false}, {0xdc2a467e, FFXII, EU, false}, {0xca284668, FFXII, EU, false}, + {0x280AD120, FFXII, JP, false}, {0x8BE3D7B2, ShadowHearts, Unknown, false}, {0xDEFA4763, ShadowHearts, US, false}, {0x21068223, Okami, US, false}, {0x891f223f, Okami, FR, false}, + {0xC5DEFEA0, Okami, JP, false}, {0x053D2239, MetalGearSolid3, US, false}, {0x086273D2, MetalGearSolid3, FR, false}, {0x26A6E286, MetalGearSolid3, EU, false}, @@ -62,6 +64,7 @@ CRC::Game CRC::m_games[] = {0x9F185CE1, MetalGearSolid3, Unknown, false}, {0x98D4BC93, MetalGearSolid3, EU, false}, {0x86BC3040, MetalGearSolid3, US, false}, //Subsistance disc 1 + {0x0481AD8A, MetalGearSolid3, JP, false}, {0x278722BF, DBZBT2, US, false}, {0xFE961D28, DBZBT2, US, false}, {0x0393B6BE, DBZBT2, EU, false}, @@ -78,7 +81,9 @@ CRC::Game CRC::m_games[] = {0x7D8F539A, SoTC, EU, false}, {0x3122B508, OnePieceGrandAdventure, US, false}, {0x6F8545DB, ICO, US, false}, + {0xB01A4C95, ICO, JP, false}, {0x5C991F4E, ICO, Unknown, false}, + {0xAEAD1CA3, GT4, JP, false}, {0x44A61C8F, GT4, Unknown, false}, {0x0086E35B, GT4, Unknown, false}, {0x77E61C8A, GT4, Unknown, false}, @@ -86,14 +91,17 @@ CRC::Game CRC::m_games[] = {0xC1640D2C, WildArms5, US, false}, {0x0FCF8FE4, WildArms5, EU, false}, {0x2294D322, WildArms5, JP, false}, + {0x565B6170, WildArms5, JP, false}, {0x8B029334, Manhunt2, Unknown, false}, {0x09F49E37, CrashBandicootWoC, Unknown, false}, {0x013E349D, ResidentEvil4, US, false}, {0x6BA2F6B9, ResidentEvil4, Unknown, false}, {0x60FA8C69, ResidentEvil4, JP, false}, {0x72E1E60E, Spartan, Unknown, false}, + {0x5ED8FB53, AceCombat4, JP, false}, {0x1B9B7563, AceCombat4, Unknown, false}, {0xEC432B24, Drakengard2, Unknown, false}, + {0xFC46EA61, Tekken5, JP, false}, {0x1F88EE37, Tekken5, Unknown, false}, {0x652050D2, Tekken5, Unknown, false}, {0x9E98B8AE, IkkiTousen, JP, false}, @@ -106,6 +114,7 @@ CRC::Game CRC::m_games[] = {0xf0a6d880, HarvestMoon, US, true}, {0x75c01a04, NamcoXCapcom, US, false}, {0xBF6F101F, GiTS, US, false}, + {0xA5768F53, GiTS, JP, false}, {0x6BF11378, Onimusha3, US, false}, {0xF442260C, MajokkoALaMode2, JP, false}, {0x14FE77F7, TalesOfAbyss, US, false}, diff --git a/gsdx/GSDrawScanline.cpp b/gsdx/GSDrawScanline.cpp index 22a85bb..ad9fd29 100644 --- a/gsdx/GSDrawScanline.cpp +++ b/gsdx/GSDrawScanline.cpp @@ -176,7 +176,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f) || m_env.sel.abe != 255 || m_env.sel.ztst > 1 || m_env.sel.atst > 1 - || m_env.sel.date) + || m_env.sel.date + || m_env.sel.fge) { f->sr = NULL; } @@ -308,6 +309,8 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc c = c.upl16(c.zwxy()); + if(!tme) c = c.srl16(7); + m_env.c.rb = c.xxxx(); m_env.c.ga = c.zzzz(); } @@ -371,20 +374,21 @@ void GSDrawScanline::SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, co GSVector4i rb10 = c10 & mask; GSVector4i rb11 = c11 & mask; + rb00 = rb00.lerp16<0>(rb01, uf); + rb10 = rb10.lerp16<0>(rb11, uf); + rb00 = rb00.lerp16<0>(rb10, vf); + + c[0] = rb00; + GSVector4i ga00 = (c00 >> 8) & mask; GSVector4i ga01 = (c01 >> 8) & mask; GSVector4i ga10 = (c10 >> 8) & mask; GSVector4i ga11 = (c11 >> 8) & mask; - rb00 = rb00.lerp16<0>(rb01, uf); - rb10 = rb10.lerp16<0>(rb11, uf); - rb00 = rb00.lerp16<0>(rb10, vf); - ga00 = ga00.lerp16<0>(ga01, uf); ga10 = ga10.lerp16<0>(ga11, uf); ga00 = ga00.lerp16<0>(ga10, vf); - c[0] = rb00; c[1] = ga00; } else @@ -409,49 +413,54 @@ void GSDrawScanline::SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, co } } -void GSDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat) +void GSDrawScanline::ColorTFX(DWORD iip, DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat) { + GSVector4i rb = iip ? rbf : m_env.c.rb; + GSVector4i ga = iip ? gaf : m_env.c.ga; + GSVector4i af; switch(tfx) { case TFX_MODULATE: - rbt = rbt.modulate16<1>(rbf).clamp8(); + rbt = rbt.modulate16<1>(rb).clamp8(); break; case TFX_DECAL: break; case TFX_HIGHLIGHT: case TFX_HIGHLIGHT2: - af = gaf.yywwlh().srl16(7); - rbt = rbt.modulate16<1>(rbf).add16(af).clamp8(); - gat = gat.modulate16<1>(gaf).add16(af).clamp8().mix16(gat); + af = ga.yywwlh().srl16(7); + rbt = rbt.modulate16<1>(rb).add16(af).clamp8(); + gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat); break; case TFX_NONE: - rbt = rbf.srl16(7); + rbt = iip ? rb.srl16(7) : rb; break; default: __assume(0); } } -void GSDrawScanline::AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat) +void GSDrawScanline::AlphaTFX(DWORD iip, DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat) { + GSVector4i ga = iip ? gaf : m_env.c.ga; + switch(tfx) { case TFX_MODULATE: - gat = gat.modulate16<1>(gaf).clamp8(); // mul16hrs rounds and breaks fogging in resident evil 4 (only modulate16<0> uses mul16hrs, but watch out) - if(!tcc) gat = gat.mix16(gaf.srl16(7)); + gat = gat.modulate16<1>(ga).clamp8(); // mul16hrs rounds and breaks fogging in resident evil 4 (only modulate16<0> uses mul16hrs, but watch out) + if(!tcc) gat = gat.mix16(ga.srl16(7)); break; case TFX_DECAL: break; case TFX_HIGHLIGHT: - gat = gat.mix16(!tcc ? gaf.srl16(7) : gat.addus8(gaf.srl16(7))); + gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7))); break; case TFX_HIGHLIGHT2: - if(!tcc) gat = gat.mix16(gaf.srl16(7)); + if(!tcc) gat = gat.mix16(ga.srl16(7)); break; case TFX_NONE: - gat = gaf.srl16(7); + gat = iip ? ga.srl16(7) : ga; break; default: __assume(0); @@ -716,11 +725,6 @@ void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW rb = vc.xxxx().add16(m_env.d[skip].rb); ga = vc.zzzz().add16(m_env.d[skip].ga); } - else - { - rb = m_env.c.rb; - ga = m_env.c.ga; - } // @@ -772,7 +776,7 @@ void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW SampleTexture(m_env.sel.ltf, m_env.sel.tlu, u, v, c); } - AlphaTFX(m_env.sel.tfx, m_env.sel.tcc, ga, c[1]); + AlphaTFX(iip, m_env.sel.tfx, m_env.sel.tcc, ga, c[1]); GSVector4i fm = m_env.fm; GSVector4i zm = m_env.zm; @@ -782,7 +786,7 @@ void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW continue; } - ColorTFX(m_env.sel.tfx, rb, ga, c[0], c[1]); + ColorTFX(iip, m_env.sel.tfx, rb, ga, c[0], c[1]); Fog(m_env.sel.fge, f, c[0], c[1]); @@ -975,11 +979,6 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex rb = vc.xxxx().add16(m_env.d[skip].rb); ga = vc.zzzz().add16(m_env.d[skip].ga); } - else - { - rb = m_env.c.rb; - ga = m_env.c.ga; - } // @@ -1031,7 +1030,7 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex SampleTexture(ltf, tlu, u, v, c); } - AlphaTFX(tfx, tcc, ga, c[1]); + AlphaTFX(iip, tfx, tcc, ga, c[1]); GSVector4i fm = m_env.fm; GSVector4i zm = m_env.zm; @@ -1041,7 +1040,7 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex continue; } - ColorTFX(tfx, rb, ga, c[0], c[1]); + ColorTFX(iip, tfx, rb, ga, c[0], c[1]); Fog(fge, f, c[0], c[1]); @@ -1091,7 +1090,7 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex rb = c[abea * 2 + 0]; ga = c[abea * 2 + 1]; - if(abeb != 2) + if(abeb < 2) { rb = rb.sub16(c[abeb * 2 + 0]); ga = ga.sub16(c[abeb * 2 + 1]); diff --git a/gsdx/GSDrawScanline.h b/gsdx/GSDrawScanline.h index f8b8035..d2a491d 100644 --- a/gsdx/GSDrawScanline.h +++ b/gsdx/GSDrawScanline.h @@ -153,8 +153,8 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline __forceinline GSVector4i Wrap(const GSVector4i& t); __forceinline void SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, const GSVector4i& v, GSVector4i* c); - __forceinline void ColorTFX(DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat); - __forceinline void AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat); + __forceinline void ColorTFX(DWORD iip, DWORD tfx, const GSVector4i& rbf, const GSVector4i& gaf, GSVector4i& rbt, GSVector4i& gat); + __forceinline void AlphaTFX(DWORD iip, DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVector4i& gat); __forceinline void Fog(DWORD fge, const GSVector4i& f, GSVector4i& rb, GSVector4i& ga); __forceinline bool TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& zd, GSVector4i& test); __forceinline bool TestAlpha(DWORD atst, DWORD afail, const GSVector4i& ga, GSVector4i& fm, GSVector4i& zm, GSVector4i& test); diff --git a/gsdx/GSRenderer.h b/gsdx/GSRenderer.h index 2cd876c..b02cd9b 100644 --- a/gsdx/GSRenderer.h +++ b/gsdx/GSRenderer.h @@ -456,108 +456,6 @@ protected: __super::Reset(); } - void VertexKick(bool skip) - { - if(m_vl.GetCount() < m_vprim) - { - return; - } - - if(m_count > m_maxcount) - { - m_maxcount = max(10000, m_maxcount * 3/2); - m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16); - m_maxcount -= 100; - } - - Vertex* v = &m_vertices[m_count]; - - int count = 0; - - switch(PRIM->PRIM) - { - case GS_POINTLIST: - m_vl.GetAt(0, v[0]); - m_vl.RemoveAll(); - count = 1; - break; - case GS_LINELIST: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - count = 2; - break; - case GS_LINESTRIP: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAt(0, 1); - count = 2; - break; - case GS_TRIANGLELIST: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAll(); - count = 3; - break; - case GS_TRIANGLESTRIP: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAt(0, 2); - count = 3; - break; - case GS_TRIANGLEFAN: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.GetAt(2, v[2]); - m_vl.RemoveAt(1, 1); - count = 3; - break; - case GS_SPRITE: - m_vl.GetAt(0, v[0]); - m_vl.GetAt(1, v[1]); - m_vl.RemoveAll(); - count = 2; - break; - case GS_INVALID: - ASSERT(0); - m_vl.RemoveAll(); - return; - default: - __assume(0); - } - - if(!skip) - { - (this->*m_fpDrawingKickHandlers[PRIM->PRIM])(v, count); - - m_count += count; -/* - if(m_context->TEST.DATE) - { - Flush(); - } - - if(m_env.COLCLAMP.CLAMP == 0) - { - Flush(); - } -*/ - } - } - - typedef void (GSRendererT::*DrawingKickHandler)(Vertex* v, int& count); - - DrawingKickHandler m_fpDrawingKickHandlers[8]; - - void DrawingKickNull(Vertex* v, int& count) - { - ASSERT(0); - } - - virtual void Draw() = 0; - void ResetPrim() { m_vl.RemoveAll(); @@ -582,43 +480,140 @@ protected: PRIM->TME ? (int)m_context->TEX0.PSM : 0xff); */ - /* - static bool first = true; - static GIFRegPRIM s_PRIM; - static GIFRegFRAME s_FRAME; - static GIFRegTEX0 s_TEX0; - - if(first || PRIM->TME != s_PRIM.TME || m_context->FRAME.i64 != s_FRAME.i64 || m_context->TEX0.i64 != s_TEX0.i64) - { - first = false; - s_PRIM = *PRIM; - s_FRAME = m_context->FRAME; - s_TEX0 = m_context->TEX0; - printf("%05x %2d", s_FRAME.Block(), s_FRAME.PSM); - if(s_PRIM.TME) printf(" %05x %2d %05x %2d", s_TEX0.TBP0, s_TEX0.PSM, s_TEX0.CBP, s_TEX0.CPSM); - printf("\n"); - } - */ - Draw(); m_count = 0; } } + void GrowVertexBuffer() + { + m_maxcount = max(10000, m_maxcount * 3/2); + m_vertices = (Vertex*)_aligned_realloc(m_vertices, sizeof(Vertex) * m_maxcount, 16); + m_maxcount -= 100; + } + + template void VertexKick(bool skip) + { + (this->*m_fpAddVertexHandlers[PRIM->TME][PRIM->FST])(); + + DWORD count = 0; + + switch(prim) + { + case GS_POINTLIST: count = 1; break; + case GS_LINELIST: count = 2; break; + case GS_LINESTRIP: count = 2; break; + case GS_TRIANGLELIST: count = 3; break; + case GS_TRIANGLESTRIP: count = 3; break; + case GS_TRIANGLEFAN: count = 3; break; + case GS_SPRITE: count = 2; break; + case GS_INVALID: count = 1; break; + default: __assume(0); + } + + if(m_vl.GetCount() < count) + { + return; + } + + if(m_count >= m_maxcount) + { + GrowVertexBuffer(); + } + + Vertex* v = &m_vertices[m_count]; + + switch(prim) + { + case GS_POINTLIST: + m_vl.GetAt(0, v[0]); + m_vl.RemoveAll(); + break; + case GS_LINELIST: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.RemoveAll(); + break; + case GS_LINESTRIP: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.RemoveAt(0, 1); + break; + case GS_TRIANGLELIST: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.GetAt(2, v[2]); + m_vl.RemoveAll(); + break; + case GS_TRIANGLESTRIP: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.GetAt(2, v[2]); + m_vl.RemoveAt(0, 2); + break; + case GS_TRIANGLEFAN: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.GetAt(2, v[2]); + m_vl.RemoveAt(1, 1); + break; + case GS_SPRITE: + m_vl.GetAt(0, v[0]); + m_vl.GetAt(1, v[1]); + m_vl.RemoveAll(); + break; + case GS_INVALID: + ASSERT(0); + m_vl.RemoveAll(); + return; + default: + __assume(0); + } + + if(!skip) + { + (this->*m_fpAddPrimHandlers[prim])(v, count); + + m_count += count; + } + } + + virtual void Draw() = 0; + + typedef void (GSRendererT::*AddVertexHandler)(); + typedef void (GSRendererT::*AddPrimHandler)(Vertex* v, DWORD& count); + + AddVertexHandler m_fpAddVertexHandlers[2][2]; + AddPrimHandler m_fpAddPrimHandlers[8]; + + void AddVertexNull() {ASSERT(0);} + void AddPrimNull(Vertex* v, DWORD& count) {ASSERT(0);} + public: GSRendererT(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, bool psrr = true) : GSRenderer(base, mt, irq, nloophack, rs, psrr) , m_count(0) - , m_maxcount(10000) + , m_maxcount(0) , m_vertices(NULL) { - m_vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 16); - m_maxcount -= 100; + m_fpVertexKickHandlers[GS_POINTLIST] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_LINELIST] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_LINESTRIP] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_TRIANGLELIST] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_TRIANGLESTRIP] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_TRIANGLEFAN] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_SPRITE] = (VertexKickHandler)&GSRendererT::VertexKick; + m_fpVertexKickHandlers[GS_INVALID] = (VertexKickHandler)&GSRendererT::VertexKick; - for(int i = 0; i < countof(m_fpDrawingKickHandlers); i++) + m_fpAddVertexHandlers[0][0] = &GSRendererT::AddVertexNull; + m_fpAddVertexHandlers[0][1] = &GSRendererT::AddVertexNull; + m_fpAddVertexHandlers[1][0] = &GSRendererT::AddVertexNull; + m_fpAddVertexHandlers[1][1] = &GSRendererT::AddVertexNull; + + for(int i = 0; i < countof(m_fpAddPrimHandlers); i++) { - m_fpDrawingKickHandlers[i] = &GSRendererT::DrawingKickNull; + m_fpAddPrimHandlers[i] = &GSRendererT::AddPrimNull; } } diff --git a/gsdx/GSRendererHW10.cpp b/gsdx/GSRendererHW10.cpp index 2e0ec4c..6dc61bf 100644 --- a/gsdx/GSRendererHW10.cpp +++ b/gsdx/GSRendererHW10.cpp @@ -27,23 +27,18 @@ GSRendererHW10::GSRendererHW10(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) : GSRendererHW(base, mt, irq, nloophack, rs, true) { - for(int i = 0; i < countof(m_fpDrawingKickHandlers); i++) - { - m_fpDrawingKickHandlers[i] = (DrawingKickHandler)&GSRendererHW10::DrawingKick; - } + m_fpAddVertexHandlers[0][0] = (AddVertexHandler)&GSRendererHW10::AddVertex<0, 0>; + m_fpAddVertexHandlers[0][1] = (AddVertexHandler)&GSRendererHW10::AddVertex<0, 0>; + m_fpAddVertexHandlers[1][0] = (AddVertexHandler)&GSRendererHW10::AddVertex<1, 0>; + m_fpAddVertexHandlers[1][1] = (AddVertexHandler)&GSRendererHW10::AddVertex<1, 1>; - m_fpDrawingKickHandlers[GS_POINTLIST] = (DrawingKickHandler)&GSRendererHW10::DrawingKickPoint; - - #if _M_SSE >= 0x401 - - m_fpDrawingKickHandlers[GS_LINELIST] = (DrawingKickHandler)&GSRendererHW10::DrawingKickLine; - m_fpDrawingKickHandlers[GS_LINESTRIP] = (DrawingKickHandler)&GSRendererHW10::DrawingKickLine; - m_fpDrawingKickHandlers[GS_TRIANGLELIST] = (DrawingKickHandler)&GSRendererHW10::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_TRIANGLESTRIP] = (DrawingKickHandler)&GSRendererHW10::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_TRIANGLEFAN] = (DrawingKickHandler)&GSRendererHW10::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_SPRITE] = (DrawingKickHandler)&GSRendererHW10::DrawingKickSprite; - - #endif + m_fpAddPrimHandlers[GS_POINTLIST] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_LINELIST] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_LINESTRIP] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLELIST] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLESTRIP] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLEFAN] = (AddPrimHandler)&GSRendererHW10::AddPrim; + m_fpAddPrimHandlers[GS_SPRITE] = (AddPrimHandler)&GSRendererHW10::AddPrim; } bool GSRendererHW10::Create(LPCTSTR title) @@ -86,112 +81,76 @@ bool GSRendererHW10::Create(LPCTSTR title) return true; } -void GSRendererHW10::VertexKick(bool skip) +template +void GSRendererHW10::AddVertex() { - Vertex& v = m_vl.AddTail(); + Vertex& dst = m_vl.AddTail(); - v.m128i[0] = m_v.m128i[0]; - v.m128i[1] = m_v.m128i[1]; + dst.m128i[0] = m_v.m128i[0]; + dst.m128i[1] = m_v.m128i[1]; - if(PRIM->TME_FST()) + if(tme && fst) { - GSVector4::storel(&v.ST, m_v.GetUV()); + GSVector4::storel(&dst.ST, m_v.GetUV()); } - - __super::VertexKick(skip); } -int GSRendererHW10::ScissorTest(const GSVector4i& p0, const GSVector4i& p1) +template +void GSRendererHW10::AddPrim(Vertex* v, DWORD& count) { GSVector4i scissor = m_context->scissor.dx10; - GSVector4i v0 = p0 < scissor; - GSVector4i v1 = p1 > scissor.zwxy(); + #if _M_SSE >= 0x401 - return (v0 | v1).mask() & 0xff; -} + GSVector4i pmin, pmax, v0, v1, v2; -void GSRendererHW10::DrawingKickPoint(Vertex* v, int& count) -{ - GSVector4i v0 = GSVector4i::load((int)v[0].p.xy).upl16(); + switch(primclass) + { + case GS_POINT_CLASS: + v0 = GSVector4i::load((int)v[0].p.xy).upl16(); + pmin = v0; + pmax = v0; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + v0 = GSVector4i::load((int)v[0].p.xy); + v1 = GSVector4i::load((int)v[1].p.xy); + pmin = v0.min_u16(v1).upl16(); + pmax = v0.max_u16(v1).upl16(); + break; + case GS_TRIANGLE_CLASS: + v0 = GSVector4i::load((int)v[0].p.xy); + v1 = GSVector4i::load((int)v[1].p.xy); + v2 = GSVector4i::load((int)v[2].p.xy); + pmin = v0.min_u16(v1).min_u16(v2).upl16(); + pmax = v0.max_u16(v1).max_u16(v2).upl16(); + break; + } - GSVector4i p0 = v0; - GSVector4i p1 = v0; + GSVector4i test = (pmax < scissor) | (pmin > scissor.zwxy()); - if(ScissorTest(p0, p1)) + if(test.mask() & 0xff) { count = 0; return; } -} -#if _M_SSE >= 0x401 + #else -void GSRendererHW10::DrawingKickLine(Vertex* v, int& count) -{ - GSVector4i v0 = GSVector4i::load((int)v[0].p.xy); - GSVector4i v1 = GSVector4i::load((int)v[1].p.xy); - - GSVector4i p0 = v0.max_u16(v1).upl16(); - GSVector4i p1 = v0.min_u16(v1).upl16(); - - if(ScissorTest(p0, p1)) + switch(primclass) { - count = 0; - return; - } -} - -void GSRendererHW10::DrawingKickTriangle(Vertex* v, int& count) -{ - GSVector4i v0 = GSVector4i::load((int)v[0].p.xy); - GSVector4i v1 = GSVector4i::load((int)v[1].p.xy); - GSVector4i v2 = GSVector4i::load((int)v[2].p.xy); - - GSVector4i p0 = v0.max_u16(v1).max_u16(v2).upl16(); - GSVector4i p1 = v0.min_u16(v1).min_u16(v2).upl16(); - - if(ScissorTest(p0, p1)) - { - count = 0; - return; - } -} - -void GSRendererHW10::DrawingKickSprite(Vertex* v, int& count) -{ - GSVector4i v0 = GSVector4i::load((int)v[0].p.xy); - GSVector4i v1 = GSVector4i::load((int)v[1].p.xy); - - GSVector4i p0 = v0.max_u16(v1).upl16(); - GSVector4i p1 = v0.min_u16(v1).upl16(); - - if(ScissorTest(p0, p1)) - { - count = 0; - return; - } -} - -#endif - -void GSRendererHW10::DrawingKick(Vertex* v, int& count) -{ - GSVector4i scissor = m_context->scissor.dx10; - - switch(count) - { - case 1: - if(v[0].p.x < scissor.x + case GS_POINT_CLASS: + if(v[0].p.x < scissor.x || v[0].p.x > scissor.z - || v[0].p.y < scissor.y + || v[0].p.y < scissor.y || v[0].p.y > scissor.w) { count = 0; return; } break; - case 2: + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: if(v[0].p.x < scissor.x && v[1].p.x < scissor.x || v[0].p.x > scissor.z && v[1].p.x > scissor.z || v[0].p.y < scissor.y && v[1].p.y < scissor.y @@ -201,7 +160,7 @@ void GSRendererHW10::DrawingKick(Vertex* v, int& count) return; } break; - case 3: + case GS_TRIANGLE_CLASS: if(v[0].p.x < scissor.x && v[1].p.x < scissor.x && v[2].p.x < scissor.x || v[0].p.x > scissor.z && v[1].p.x > scissor.z && v[2].p.x > scissor.z || v[0].p.y < scissor.y && v[1].p.y < scissor.y && v[2].p.y < scissor.y @@ -211,9 +170,9 @@ void GSRendererHW10::DrawingKick(Vertex* v, int& count) return; } break; - default: - __assume(0); } + + #endif } void GSRendererHW10::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache::GSTexture* tex) diff --git a/gsdx/GSRendererHW10.h b/gsdx/GSRendererHW10.h index 4eee5be..b2aa13e 100644 --- a/gsdx/GSRendererHW10.h +++ b/gsdx/GSRendererHW10.h @@ -35,22 +35,14 @@ class GSRendererHW10 : public GSRendererHW + void AddVertex(); + + template + void AddPrim(Vertex* v, DWORD& count); + bool WrapZ(DWORD maxz); - __forceinline int ScissorTest(const GSVector4i& p0, const GSVector4i& p1); - - void DrawingKickPoint(Vertex* v, int& count); - - #if _M_SSE >= 0x401 - - void DrawingKickLine(Vertex* v, int& count); - void DrawingKickTriangle(Vertex* v, int& count); - void DrawingKickSprite(Vertex* v, int& count); - - #endif - void Draw(int prim, Texture& rt, Texture& ds, GSTextureCache::GSTexture* tex); struct diff --git a/gsdx/GSRendererHW9.cpp b/gsdx/GSRendererHW9.cpp index e7dad6c..709d051 100644 --- a/gsdx/GSRendererHW9.cpp +++ b/gsdx/GSRendererHW9.cpp @@ -30,13 +30,18 @@ GSRendererHW9::GSRendererHW9(BYTE* base, bool mt, void (*irq)(), int nloophack, m_fba.enabled = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("fba"), TRUE); m_logz = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("logz"), FALSE); - m_fpDrawingKickHandlers[GS_POINTLIST] = (DrawingKickHandler)&GSRendererHW9::DrawingKickPoint; - m_fpDrawingKickHandlers[GS_LINELIST] = (DrawingKickHandler)&GSRendererHW9::DrawingKickLine; - m_fpDrawingKickHandlers[GS_LINESTRIP] = (DrawingKickHandler)&GSRendererHW9::DrawingKickLine; - m_fpDrawingKickHandlers[GS_TRIANGLELIST] = (DrawingKickHandler)&GSRendererHW9::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_TRIANGLESTRIP] = (DrawingKickHandler)&GSRendererHW9::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_TRIANGLEFAN] = (DrawingKickHandler)&GSRendererHW9::DrawingKickTriangle; - m_fpDrawingKickHandlers[GS_SPRITE] = (DrawingKickHandler)&GSRendererHW9::DrawingKickSprite; + m_fpAddVertexHandlers[0][0] = (AddVertexHandler)&GSRendererHW9::AddVertex<0, 0>; + m_fpAddVertexHandlers[0][1] = (AddVertexHandler)&GSRendererHW9::AddVertex<0, 0>; + m_fpAddVertexHandlers[1][0] = (AddVertexHandler)&GSRendererHW9::AddVertex<1, 0>; + m_fpAddVertexHandlers[1][1] = (AddVertexHandler)&GSRendererHW9::AddVertex<1, 1>; + + m_fpAddPrimHandlers[GS_POINTLIST] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_LINELIST] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_LINESTRIP] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLELIST] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLESTRIP] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLEFAN] = (AddPrimHandler)&GSRendererHW9::AddPrim; + m_fpAddPrimHandlers[GS_SPRITE] = (AddPrimHandler)&GSRendererHW9::AddPrim; } bool GSRendererHW9::Create(LPCTSTR title) @@ -80,123 +85,93 @@ bool GSRendererHW9::Create(LPCTSTR title) return true; } -void GSRendererHW9::VertexKick(bool skip) +template +void GSRendererHW9::AddVertex() { - Vertex& v = m_vl.AddTail(); + Vertex& dst = m_vl.AddTail(); - v.p.x = (float)m_v.XYZ.X; - v.p.y = (float)m_v.XYZ.Y; - v.p.z = (float)m_v.XYZ.Z; + dst.p.x = (float)m_v.XYZ.X; + dst.p.y = (float)m_v.XYZ.Y; + dst.p.z = (float)m_v.XYZ.Z; - v.c0 = m_v.RGBAQ.ai32[0]; - v.c1 = m_v.FOG.ai32[1]; + dst.c0 = m_v.RGBAQ.ai32[0]; + dst.c1 = m_v.FOG.ai32[1]; - if(PRIM->TME) + if(tme) { - if(PRIM->FST) + if(fst) { - GSVector4::storel(&v.t, m_v.GetUV()); + GSVector4::storel(&dst.t, m_v.GetUV()); } else { - v.t.x = m_v.ST.S; - v.t.y = m_v.ST.T; - v.p.w = m_v.RGBAQ.Q; + dst.t.x = m_v.ST.S; + dst.t.y = m_v.ST.T; + dst.p.w = m_v.RGBAQ.Q; } } - - __super::VertexKick(skip); } -int GSRendererHW9::ScissorTest(const GSVector4& p0, const GSVector4& p1) +template +void GSRendererHW9::AddPrim(Vertex* v, DWORD& count) { GSVector4 scissor = m_context->scissor.dx9; - GSVector4 v0 = p0 < scissor; - GSVector4 v1 = p1 > scissor.zwxy(); + GSVector4 pmin, pmax; - return (v0 | v1).mask() & 3; -} - -void GSRendererHW9::DrawingKickPoint(Vertex* v, int& count) -{ - GSVector4 p0 = v[0].p; - GSVector4 p1 = v[0].p; - - if(ScissorTest(p0, p1)) + switch(primclass) { - count = 0; - return; + case GS_POINT_CLASS: + pmin = v[0].p; + pmax = v[0].p; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + pmin = v[0].p.minv(v[1].p); + pmax = v[0].p.maxv(v[1].p); + break; + case GS_TRIANGLE_CLASS: + pmin = v[0].p.minv(v[1].p).minv(v[2].p); + pmax = v[0].p.maxv(v[1].p).maxv(v[2].p); + break; } -} -void GSRendererHW9::DrawingKickLine(Vertex* v, int& count) -{ - GSVector4 p0 = v[0].p.maxv(v[1].p); - GSVector4 p1 = v[0].p.minv(v[1].p); + GSVector4 test = (pmax < scissor) | (pmin > scissor.zwxy()); - if(ScissorTest(p0, p1)) + if(test.mask() & 3) { count = 0; return; } - if(PRIM->IIP == 0) + switch(primclass) { - v[0].c0 = v[1].c0; + case GS_POINT_CLASS: + break; + case GS_LINE_CLASS: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} + break; + case GS_SPRITE_CLASS: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0;} + v[0].p.z = v[1].p.z; + v[0].p.w = v[1].p.w; v[0].c1 = v[1].c1; + v[2] = v[1]; + v[3] = v[1]; + v[1].p.y = v[0].p.y; + v[1].t.y = v[0].t.y; + v[2].p.x = v[0].p.x; + v[2].t.x = v[0].t.x; + v[4] = v[1]; + v[5] = v[2]; + count += 4; + break; + case GS_TRIANGLE_CLASS: + if(PRIM->IIP == 0) {v[0].c0 = v[1].c0 = v[2].c0;} + break; } } -void GSRendererHW9::DrawingKickTriangle(Vertex* v, int& count) -{ - GSVector4 p0 = v[0].p.maxv(v[1].p).maxv(v[2].p); - GSVector4 p1 = v[0].p.minv(v[1].p).minv(v[2].p); - - if(ScissorTest(p0, p1)) - { - count = 0; - return; - } - - if(PRIM->IIP == 0) - { - v[0].c0 = v[2].c0; - v[0].c1 = v[2].c1; - } -} - -void GSRendererHW9::DrawingKickSprite(Vertex* v, int& count) -{ - GSVector4 p0 = v[0].p.maxv(v[1].p); - GSVector4 p1 = v[0].p.minv(v[1].p); - - if(ScissorTest(p0, p1)) - { - count = 0; - return; - } - - if(PRIM->IIP == 0) - { - v[0].c0 = v[1].c0; - } - - v[0].p.z = v[1].p.z; - v[0].p.w = v[1].p.w; - v[0].c1 = v[1].c1; - v[2] = v[1]; - v[3] = v[1]; - v[1].p.y = v[0].p.y; - v[1].t.y = v[0].t.y; - v[2].p.x = v[0].p.x; - v[2].t.x = v[0].t.x; - v[4] = v[1]; - v[5] = v[2]; - - count += 4; -} - void GSRendererHW9::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache::GSTexture* tex) { GSDrawingEnvironment& env = m_env; diff --git a/gsdx/GSRendererHW9.h b/gsdx/GSRendererHW9.h index c976c07..382e39c 100644 --- a/gsdx/GSRendererHW9.h +++ b/gsdx/GSRendererHW9.h @@ -36,16 +36,14 @@ protected: GSTextureFX9 m_tfx; bool m_logz; - void VertexKick(bool skip); + template + void AddVertex(); + + template + void AddPrim(Vertex* v, DWORD& count); + bool WrapZ(float maxz); - __forceinline int ScissorTest(const GSVector4& p0, const GSVector4& p1); - - void DrawingKickPoint(Vertex* v, int& count); - void DrawingKickLine(Vertex* v, int& count); - void DrawingKickTriangle(Vertex* v, int& count); - void DrawingKickSprite(Vertex* v, int& count); - void Draw(int prim, Texture& rt, Texture& ds, GSTextureCache::GSTexture* tex); struct diff --git a/gsdx/GSRendererNull.h b/gsdx/GSRendererNull.h index be83a60..0c2d3a6 100644 --- a/gsdx/GSRendererNull.h +++ b/gsdx/GSRendererNull.h @@ -27,14 +27,11 @@ template class GSRendererNull : public GSRendererT { protected: - void VertexKick(bool skip) + void AddVertex() { - m_vl.AddTail(); - - __super::VertexKick(skip); } - void DrawingKick(GSVertexNull* v, int& count) + void AddPrim(GSVertexNull* v, DWORD& count) { m_perfmon.Put(GSPerfMon::Prim, 1); } @@ -52,9 +49,14 @@ public: GSRendererNull(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) : GSRendererT(base, mt, irq, nloophack, rs) { - for(int i = 0; i < countof(m_fpDrawingKickHandlers); i++) + m_fpAddVertexHandlers[0][0] = (AddVertexHandler)&GSRendererNull::AddVertex; + m_fpAddVertexHandlers[0][1] = (AddVertexHandler)&GSRendererNull::AddVertex; + m_fpAddVertexHandlers[1][0] = (AddVertexHandler)&GSRendererNull::AddVertex; + m_fpAddVertexHandlers[1][1] = (AddVertexHandler)&GSRendererNull::AddVertex; + + for(int i = 0; i < countof(m_fpAddPrimHandlers); i++) { - m_fpDrawingKickHandlers[i] = (DrawingKickHandler)&GSRendererNull::DrawingKick; + m_fpAddPrimHandlers[i] = (AddPrimHandler)&GSRendererNull::AddPrim; } } }; \ No newline at end of file diff --git a/gsdx/GSRendererSW.h b/gsdx/GSRendererSW.h index 3beeace..514baea 100644 --- a/gsdx/GSRendererSW.h +++ b/gsdx/GSRendererSW.h @@ -117,7 +117,7 @@ protected: return true; } - void VertexKick(bool skip) + template void AddVertex() { GSVertexSW v; @@ -125,15 +125,14 @@ protected: GSVector4i o((int)m_context->XYOFFSET.OFX, (int)m_context->XYOFFSET.OFY); v.p = GSVector4(p - o) * g_pos_scale; - v.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the DWORD=>float=>DWORD conversion - v.c = GSVector4((DWORD)m_v.RGBAQ.ai32[0]) * 128.0f; + v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.ai32[0]).u8to32() << 7); - if(PRIM->TME) + if(tme) { float q; - if(PRIM->FST) + if(fst) { v.t = GSVector4(GSVector4i(m_v.UV.U, m_v.UV.V) << (16 - 4)); q = 1.0f; @@ -148,13 +147,14 @@ protected: v.t = v.t.xyxy(GSVector4::load(q)); } - m_vl.AddTail() = v; + GSVertexSW& dst = m_vl.AddTail(); - __super::VertexKick(skip); + dst = v; + + dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the DWORD => float => DWORD conversion } - template - void DrawingKick(GSVertexSW* v, int& count) + template void AddPrim(GSVertexSW* v, DWORD& count) { GSVector4 pmin, pmax; @@ -213,10 +213,10 @@ protected: break; case GS_TRIANGLE_CLASS: if(PRIM->IIP == 0) {v[0].c = v[2].c; v[1].c = v[2].c;} - m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t).minv(v[2].t); - m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t).maxv(v[2].t); - m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c).minv(v[2].c); - m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c).maxv(v[2].c); + m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t.minv(v[2].t)); + m_vtrace.max.t = m_vtrace.max.t.maxv(v[0].t).maxv(v[1].t.maxv(v[2].t)); + m_vtrace.min.c = m_vtrace.min.c.minv(v[0].c).minv(v[1].c.minv(v[2].c)); + m_vtrace.max.c = m_vtrace.max.c.maxv(v[0].c).maxv(v[1].c.maxv(v[2].c)); break; case GS_SPRITE_CLASS: m_vtrace.min.t = m_vtrace.min.t.minv(v[0].t).minv(v[1].t); @@ -457,40 +457,92 @@ protected: { p.sel.fst = 1; - GSVector4 w = v[0].t.zzzz().rcpnr(); - - for(int i = 0, j = m_count; i < j; i++) + if(v[0].t.z != 1.0f) { - v[i].t *= w; + GSVector4 w = v[0].t.zzzz().rcpnr(); + + for(int i = 0, j = m_count; i < j; i++) + { + v[i].t *= w; + } + + m_vtrace.min.t *= w; + m_vtrace.max.t *= w; } } else if(prim == GS_SPRITE) { p.sel.fst = 1; + GSVector4 tmin = GSVector4(FLT_MAX); + GSVector4 tmax = GSVector4(-FLT_MAX); + for(int i = 0, j = m_count; i < j; i += 2) { GSVector4 w = v[i + 1].t.zzzz().rcpnr(); - v[i + 0].t *= w; - v[i + 1].t *= w; + GSVector4 v0 = v[i + 0].t * w; + GSVector4 v1 = v[i + 1].t * w; + + v[i + 0].t = v0; + v[i + 1].t = v1; + + tmin = tmin.minv(v0).minv(v1); + tmax = tmax.maxv(v0).maxv(v1); } + + m_vtrace.max.t = tmax; + m_vtrace.min.t = tmin; } } - if(p.sel.fst && p.sel.ltf) + if(p.sel.fst) { // if q is constant we can do the half pel shift for bilinear sampling on the vertices - GSVertexSW* v = m_vertices; + if(p.sel.ltf) + { + GSVector4 half(0x8000, 0x8000); - GSVector4 half(0x8000, 0x8000); + GSVertexSW* v = m_vertices; + + for(int i = 0, j = m_count; i < j; i++) + { + v[i].t -= half; + } + + m_vtrace.min.t -= half; + m_vtrace.max.t += half; + } + } + /* + else + { + GSVector4 tmin = GSVector4(FLT_MAX); + GSVector4 tmax = GSVector4(-FLT_MAX); + + GSVertexSW* v = m_vertices; for(int i = 0, j = m_count; i < j; i++) { - v[i].t -= half; + GSVector4 v0 = v[i].t * v[i].t.zzzz().rcpnr(); + + tmin = tmin.minv(v0); + tmax = tmax.maxv(v0); } + + if(p.sel.ltf) + { + GSVector4 half(0x8000, 0x8000); + + tmin -= half; + tmax += half; + } + + m_vtrace.min.t = tmin; + m_vtrace.max.t = tmax; } + */ CRect r; @@ -522,15 +574,11 @@ protected: p.sel.abe = context->ALPHA.ai32[0]; p.sel.pabe = env.PABE.PABE; } - else - { - // printf("opaque\n"); - } + } - if(PRIM->AA1) - { - // TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1) - } + if(PRIM->AA1) + { + // TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1) } if(p.sel.date @@ -642,7 +690,7 @@ protected: m_tc->InvalidateVideoMem(BITBLTBUF, r); } - void MinMaxUV(int w, int h, CRect& r, bool fst) + void MinMaxUV(int w, int h, CRect& r, DWORD fst) { const GSDrawingContext* context = m_context; @@ -694,16 +742,34 @@ protected: if(fst) { - GSVector4i v = GSVector4i(m_vtrace.min.t.xyxy(m_vtrace.max.t) / (m_vtrace.min.t.zzzz() * 0x10000)); + GSVector4i uv = GSVector4i(m_vtrace.min.t.xyxy(m_vtrace.max.t)).sra32(16); +/* + int tw = context->TEX0.TW; + int th = context->TEX0.TH; + GSVector4i u = uv & GSVector4i::xffffffff().srl32(32 - tw); + GSVector4i v = uv & GSVector4i::xffffffff().srl32(32 - th); + + GSVector4i uu = uv.sra32(tw); + GSVector4i vv = uv.sra32(th); + + int mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); +*/ switch(wms) { - case CLAMP_REPEAT: // TODO + case CLAMP_REPEAT: +/* + if(mask & 0x000f) + { + if(vr.x < u.x) vr.x = u.x; + if(vr.z > u.z + 1) vr.z = u.z + 1; + } +*/ break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: - if(vr.x < v.x) vr.x = v.x; - if(vr.z > v.z + 1) vr.z = v.z + 1; + if(vr.x < uv.x) vr.x = uv.x; + if(vr.z > uv.z + 1) vr.z = uv.z + 1; break; case CLAMP_REGION_REPEAT: // TODO break; @@ -713,12 +779,19 @@ protected: switch(wmt) { - case CLAMP_REPEAT: // TODO + case CLAMP_REPEAT: +/* + if(mask & 0xf000) + { + if(vr.y < v.y) vr.y = v.y; + if(vr.w > v.w + 1) vr.w = v.w + 1; + } +*/ break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: - if(vr.y < v.y) vr.y = v.y; - if(vr.w > v.w + 1) vr.w = v.w + 1; + if(vr.y < uv.y) vr.y = uv.y; + if(vr.w > uv.w + 1) vr.w = uv.w + 1; break; case CLAMP_REGION_REPEAT: // TODO break; @@ -740,13 +813,18 @@ public: m_tc = new GSTextureCacheSW(this); - m_fpDrawingKickHandlers[GS_POINTLIST] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_LINELIST] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_LINESTRIP] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_TRIANGLELIST] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_TRIANGLESTRIP] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_TRIANGLEFAN] = (DrawingKickHandler)&GSRendererSW::DrawingKick; - m_fpDrawingKickHandlers[GS_SPRITE] = (DrawingKickHandler)&GSRendererSW::DrawingKick; + m_fpAddVertexHandlers[0][0] = (AddVertexHandler)&GSRendererSW::AddVertex<0, 0>; + m_fpAddVertexHandlers[0][1] = (AddVertexHandler)&GSRendererSW::AddVertex<0, 0>; + m_fpAddVertexHandlers[1][0] = (AddVertexHandler)&GSRendererSW::AddVertex<1, 0>; + m_fpAddVertexHandlers[1][1] = (AddVertexHandler)&GSRendererSW::AddVertex<1, 1>; + + m_fpAddPrimHandlers[GS_POINTLIST] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_LINELIST] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_LINESTRIP] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLELIST] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLESTRIP] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_TRIANGLEFAN] = (AddPrimHandler)&GSRendererSW::AddPrim; + m_fpAddPrimHandlers[GS_SPRITE] = (AddPrimHandler)&GSRendererSW::AddPrim; } virtual ~GSRendererSW() diff --git a/gsdx/GSState.cpp b/gsdx/GSState.cpp index 6d94f5a..09a75ab 100644 --- a/gsdx/GSState.cpp +++ b/gsdx/GSState.cpp @@ -137,8 +137,6 @@ void GSState::Reset() m_context = &m_env.CTXT[0]; - m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM); - InvalidateTextureCache(); } @@ -518,8 +516,6 @@ void GSState::GIFRegHandlerPRIM(GIFReg* r) m_context = &m_env.CTXT[PRIM->CTXT]; - m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM); - ResetPrim(); } @@ -677,27 +673,6 @@ void GSState::GIFRegHandlerPRMODECONT(GIFReg* r) if(PRIM->PRIM == 7) TRACE(_T("Invalid PRMODECONT/PRIM\n")); m_context = &m_env.CTXT[PRIM->CTXT]; - - m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM); -/* - if(m_env.PRMODECONT.AC != r->PRMODECONT.AC) - { - if(m_env.PRIM.ai32[0] != m_env.PRMODE.ai32[0]) - { - Flush(); - } - - m_env.PRMODECONT.AC = r->PRMODECONT.AC; - - PRIM = m_env.PRMODECONT.AC ? &m_env.PRIM : (GIFRegPRIM*)&m_env.PRMODE; - - if(PRIM->PRIM == 7) TRACE(_T("Invalid PRMODECONT/PRIM\n")); - - m_context = &m_env.CTXT[PRIM->CTXT]; - - m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM); - } -*/ } void GSState::GIFRegHandlerPRMODE(GIFReg* r) @@ -1584,8 +1559,6 @@ int GSState::Defrost(const GSFreezeData* fd) m_context = &m_env.CTXT[PRIM->CTXT]; - m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM); - m_env.CTXT[0].UpdateScissor(); m_env.CTXT[1].UpdateScissor(); diff --git a/gsdx/GSState.h b/gsdx/GSState.h index f9377dd..e2e0d35 100644 --- a/gsdx/GSState.h +++ b/gsdx/GSState.h @@ -125,6 +125,15 @@ class GSState : public GSAlignedClass<16> protected: bool IsBadFrame(int& skip); + typedef void (GSState::*VertexKickHandler)(bool skip); + + VertexKickHandler m_fpVertexKickHandlers[8]; + + void VertexKick(bool skip) + { + (this->*m_fpVertexKickHandlers[PRIM->PRIM])(skip); + } + public: GIFRegPRIM* PRIM; GSRegPMODE* PMODE; @@ -184,7 +193,6 @@ public: virtual void Flush(); virtual void FlushPrim() = 0; virtual void ResetPrim() = 0; - virtual void VertexKick(bool skip) = 0; virtual void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {} virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {} virtual void InvalidateTextureCache() {} diff --git a/gsdx/GSUtil.cpp b/gsdx/GSUtil.cpp index dacd4ab..bba238a 100644 --- a/gsdx/GSUtil.cpp +++ b/gsdx/GSUtil.cpp @@ -27,7 +27,6 @@ static struct GSUtilMaps { BYTE PrimClassField[8]; - BYTE PrimVertexCount[8]; bool CompatibleBitsField[64][64]; bool SharedBitsField[64][64]; @@ -42,15 +41,6 @@ static struct GSUtilMaps PrimClassField[GS_SPRITE] = GS_SPRITE_CLASS; PrimClassField[GS_INVALID] = GS_INVALID_CLASS; - PrimVertexCount[GS_POINTLIST] = 1; - PrimVertexCount[GS_LINELIST] = 2; - PrimVertexCount[GS_LINESTRIP] = 2; - PrimVertexCount[GS_TRIANGLELIST] = 3; - PrimVertexCount[GS_TRIANGLESTRIP] = 3; - PrimVertexCount[GS_TRIANGLEFAN] = 3; - PrimVertexCount[GS_SPRITE] = 2; - PrimVertexCount[GS_INVALID] = 1; - memset(CompatibleBitsField, 0, sizeof(CompatibleBitsField)); CompatibleBitsField[PSM_PSMCT32][PSM_PSMCT24] = true; @@ -87,11 +77,6 @@ GS_PRIM_CLASS GSUtil::GetPrimClass(DWORD prim) return (GS_PRIM_CLASS)s_maps.PrimClassField[prim]; } -DWORD GSUtil::GetPrimVertexCount(DWORD prim) -{ - return s_maps.PrimVertexCount[prim]; -} - bool GSUtil::HasSharedBits(DWORD spsm, DWORD dpsm) { return s_maps.SharedBitsField[spsm][dpsm]; diff --git a/gsdx/GSUtil.h b/gsdx/GSUtil.h index 4c3c495..973111f 100644 --- a/gsdx/GSUtil.h +++ b/gsdx/GSUtil.h @@ -27,7 +27,6 @@ class GSUtil { public: static GS_PRIM_CLASS GetPrimClass(DWORD prim); - static DWORD GetPrimVertexCount(DWORD prim); static bool HasSharedBits(DWORD spsm, DWORD dpsm); static bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm); diff --git a/gsdx/GSVector.h b/gsdx/GSVector.h index 4111418..9d7bcfc 100644 --- a/gsdx/GSVector.h +++ b/gsdx/GSVector.h @@ -452,7 +452,15 @@ public: GSVector4i upl8() const { + #if 0 // _M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu8_epi16(m)); + + #else + return GSVector4i(_mm_unpacklo_epi8(m, _mm_setzero_si128())); + + #endif } GSVector4i uph8() const @@ -462,7 +470,15 @@ public: GSVector4i upl16() const { + #if 0 //_M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu16_epi32(m)); + + #else + return GSVector4i(_mm_unpacklo_epi16(m, _mm_setzero_si128())); + + #endif } GSVector4i uph16() const @@ -472,7 +488,15 @@ public: GSVector4i upl32() const { + #if 0 //_M_SSE >= 0x401 // TODO: compiler bug + + return GSVector4i(_mm_cvtepu32_epi64(m)); + + #else + return GSVector4i(_mm_unpacklo_epi32(m, _mm_setzero_si128())); + + #endif } GSVector4i uph32() const @@ -492,6 +516,11 @@ public: #if _M_SSE >= 0x401 + // WARNING!!! + // + // MSVC (2008, 2010 ctp) believes that there is a "mem, reg" form of the pmovz/sx* instructions, + // turning these intrinsics into a minefield, don't spill regs when using them... + GSVector4i i8to16() const { return GSVector4i(_mm_cvtepi8_epi16(m)); diff --git a/gsdx/GSVertex.h b/gsdx/GSVertex.h index 06fdf74..c1b52ac 100644 --- a/gsdx/GSVertex.h +++ b/gsdx/GSVertex.h @@ -48,7 +48,7 @@ __declspec(align(16)) struct GSVertex GSVertex() {memset(this, 0, sizeof(*this));} - GSVector4 GetUV() {return GSVector4(GSVector4i::load(UV.ai32[0]).upl16());} + GSVector4 GetUV() const {return GSVector4(GSVector4i::load(UV.ai32[0]).upl16());} }; struct GSVertexOld diff --git a/gsdx/res/tfx10.fx b/gsdx/res/tfx10.fx index 80bf578..7be092d 100644 --- a/gsdx/res/tfx10.fx +++ b/gsdx/res/tfx10.fx @@ -103,7 +103,6 @@ void gs_main(point VS_OUTPUT input[1], inout PointStream stream) void gs_main(line VS_OUTPUT input[2], inout LineStream stream) { #if IIP == 0 - input[0].t.z = input[1].t.z; input[0].c = input[1].c; #endif @@ -117,9 +116,7 @@ void gs_main(line VS_OUTPUT input[2], inout LineStream stream) void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream stream) { #if IIP == 0 - input[0].t.z = input[2].t.z; input[0].c = input[2].c; - input[1].t.z = input[2].t.z; input[1].c = input[2].c; #endif