This commit is contained in:
gabest
2008-12-11 03:10:31 +00:00
parent a79db9d22b
commit 476850c461
4 changed files with 164 additions and 91 deletions

View File

@@ -166,8 +166,8 @@ void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin
GSVector4i u1 = u0.add16(GSVector4i::x0001());
GSVector4i v1 = v0.add16(GSVector4i::x0001());
GSVector4i uf = u & GSVector4i::x00ff();
GSVector4i vf = v & GSVector4i::x00ff();
GSVector4i uf = (u & GSVector4i::x00ff()) << 7;
GSVector4i vf = (v & GSVector4i::x00ff()) << 7;
if(twin)
{
@@ -247,41 +247,41 @@ void GPUDrawScanline::SampleTexture(int pixels, DWORD ltf, DWORD tlu, DWORD twin
#endif
GSVector4i r00 = (c00 & 0x001f001f) << 2;
GSVector4i r01 = (c01 & 0x001f001f) << 2;
GSVector4i r10 = (c10 & 0x001f001f) << 2;
GSVector4i r11 = (c11 & 0x001f001f) << 2;
GSVector4i r00 = (c00 & 0x001f001f) << 3;
GSVector4i r01 = (c01 & 0x001f001f) << 3;
GSVector4i r10 = (c10 & 0x001f001f) << 3;
GSVector4i r11 = (c11 & 0x001f001f) << 3;
r00 = r00.add16(r01.sub16(r00).mul16l(uf).sra16(8));
r10 = r10.add16(r11.sub16(r10).mul16l(uf).sra16(8));
c[0] = r00.add16(r10.sub16(r00).mul16l(vf).sra16(8)) << 1;
r00 = r00.lerp16<0>(r01, uf);
r10 = r10.lerp16<0>(r11, uf);
c[0] = r00.lerp16<0>(r10, vf);
GSVector4i g00 = (c00 & 0x03e003e0) >> 3;
GSVector4i g01 = (c01 & 0x03e003e0) >> 3;
GSVector4i g10 = (c10 & 0x03e003e0) >> 3;
GSVector4i g11 = (c11 & 0x03e003e0) >> 3;
GSVector4i g00 = (c00 & 0x03e003e0) >> 2;
GSVector4i g01 = (c01 & 0x03e003e0) >> 2;
GSVector4i g10 = (c10 & 0x03e003e0) >> 2;
GSVector4i g11 = (c11 & 0x03e003e0) >> 2;
g00 = g00.add16(g01.sub16(g00).mul16l(uf).sra16(8));
g10 = g10.add16(g11.sub16(g10).mul16l(uf).sra16(8));
c[1] = g00.add16(g10.sub16(g00).mul16l(vf).sra16(8)) << 1;
g00 = g00.lerp16<0>(g01, uf);
g10 = g10.lerp16<0>(g11, uf);
c[1] = g00.lerp16<0>(g10, vf);
GSVector4i b00 = (c00 & 0x7c007c00) >> 8;
GSVector4i b01 = (c01 & 0x7c007c00) >> 8;
GSVector4i b10 = (c10 & 0x7c007c00) >> 8;
GSVector4i b11 = (c11 & 0x7c007c00) >> 8;
GSVector4i b00 = (c00 & 0x7c007c00) >> 7;
GSVector4i b01 = (c01 & 0x7c007c00) >> 7;
GSVector4i b10 = (c10 & 0x7c007c00) >> 7;
GSVector4i b11 = (c11 & 0x7c007c00) >> 7;
b00 = b00.add16(b01.sub16(b00).mul16l(uf).sra16(8));
b10 = b10.add16(b11.sub16(b10).mul16l(uf).sra16(8));
c[2] = b00.add16(b10.sub16(b00).mul16l(vf).sra16(8)) << 1;
b00 = b00.lerp16<0>(b01, uf);
b10 = b10.lerp16<0>(b11, uf);
c[2] = b00.lerp16<0>(b10, vf);
GSVector4i a00 = (c00 & 0x80008000) >> 9;
GSVector4i a01 = (c01 & 0x80008000) >> 9;
GSVector4i a10 = (c10 & 0x80008000) >> 9;
GSVector4i a11 = (c11 & 0x80008000) >> 9;
GSVector4i a00 = (c00 & 0x80008000) >> 8;
GSVector4i a01 = (c01 & 0x80008000) >> 8;
GSVector4i a10 = (c10 & 0x80008000) >> 8;
GSVector4i a11 = (c11 & 0x80008000) >> 8;
a00 = a00.add16(a01.sub16(a00).mul16l(uf).sra16(8));
a10 = a10.add16(a11.sub16(a10).mul16l(uf).sra16(8));
c[3] = a00.add16(a10.sub16(a00).mul16l(vf).sra16(8)).gt16(GSVector4i::zero());
a00 = a00.lerp16<0>(a01, uf);
a10 = a10.lerp16<0>(a11, uf);
c[3] = a00.lerp16<0>(a10, vf).gt16(GSVector4i::zero());
// mask out blank pixels (not perfect)
@@ -375,9 +375,9 @@ void GPUDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& r, const GSVector4i&
c[2] = b.srl16(7);
break;
case 2: // modulate (tfx = tme | tge)
c[0] = c[0].sll16(2).mul16hu(r).clamp8();
c[1] = c[1].sll16(2).mul16hu(g).clamp8();
c[2] = c[2].sll16(2).mul16hu(b).clamp8();
c[0] = c[0].modulate16<1>(r).clamp8();
c[1] = c[1].modulate16<1>(g).clamp8();
c[2] = c[2].modulate16<1>(b).clamp8();
break;
case 3: // decal (tfx = tme)
break;

View File

@@ -409,7 +409,7 @@ void GSDrawScanline::SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD tlu,
GSVector4i ui = GSVector4i(u);
GSVector4i vi = GSVector4i(v);
GSVector4i uv = (ui.sra32(12)).ps32(vi.sra32(12));
GSVector4i uv = ui.sra32(15).ps32(vi.sra32(15));
GSVector4i uv0, uv1;
GSVector4i addr00, addr01, addr10, addr11;
@@ -417,17 +417,24 @@ void GSDrawScanline::SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD tlu,
if(ltf)
{
GSVector4i uvf = (ui & GSVector4i::x00000fff()).ps32(vi & GSVector4i::x00000fff());
GSVector4i mask = GSVector4i::x00007fff();
GSVector4i uvf = (ui & mask).ps32(vi & mask);
GSVector4i uf = uvf.upl16(uvf);
GSVector4i vf = uvf.uph16(uvf);
uv0 = Wrap(uv);
uv1 = Wrap(uv.add16(GSVector4i::x0001()));
addr00 = (uv0.uph16() << tw) + uv0.upl16();
addr01 = (uv0.uph16() << tw) + uv1.upl16();
addr10 = (uv1.uph16() << tw) + uv0.upl16();
addr11 = (uv1.uph16() << tw) + uv1.upl16();
GSVector4i y0 = uv0.uph16() << tw;
GSVector4i y1 = uv1.uph16() << tw;
GSVector4i x0 = uv0.upl16();
GSVector4i x1 = uv1.upl16();
addr00 = y0 + x0;
addr01 = y0 + x1;
addr10 = y1 + x0;
addr11 = y1 + x1;
#if _M_SSE >= 0x401
@@ -485,23 +492,25 @@ void GSDrawScanline::SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD tlu,
#endif
GSVector4i rb00 = c00 & GSVector4i::x00ff();
GSVector4i rb01 = c01 & GSVector4i::x00ff();
GSVector4i rb10 = c10 & GSVector4i::x00ff();
GSVector4i rb11 = c11 & GSVector4i::x00ff();
mask = GSVector4i::x00ff();
GSVector4i ga00 = (c00 >> 8) & GSVector4i::x00ff();
GSVector4i ga01 = (c01 >> 8) & GSVector4i::x00ff();
GSVector4i ga10 = (c10 >> 8) & GSVector4i::x00ff();
GSVector4i ga11 = (c11 >> 8) & GSVector4i::x00ff();
GSVector4i rb00 = c00 & mask;
GSVector4i rb01 = c01 & mask;
GSVector4i rb10 = c10 & mask;
GSVector4i rb11 = c11 & mask;
rb00 = rb00.add16(rb01.sub16(rb00).sll16(4).mul16hs(uf));
rb10 = rb10.add16(rb11.sub16(rb10).sll16(4).mul16hs(uf));
rb00 = rb00.add16(rb10.sub16(rb00).sll16(4).mul16hs(vf));
GSVector4i ga00 = (c00 >> 8) & mask;
GSVector4i ga01 = (c01 >> 8) & mask;
GSVector4i ga10 = (c10 >> 8) & mask;
GSVector4i ga11 = (c11 >> 8) & mask;
ga00 = ga00.add16(ga01.sub16(ga00).sll16(4).mul16hs(uf));
ga10 = ga10.add16(ga11.sub16(ga10).sll16(4).mul16hs(uf));
ga00 = ga00.add16(ga10.sub16(ga00).sll16(4).mul16hs(vf));
rb00 = rb00.lerp16<0>(rb01, uf);
rb10 = rb10.lerp16<0>(rb11, uf);
rb00 = rb00.lerp16<0>(rb10, vf);
ga00 = ga00.lerp16<0>(ga01, uf);
ga10 = ga10.lerp16<0>(ga11, uf);
ga00 = ga00.lerp16<0>(ga10, vf);
c[0] = rb00;
c[1] = ga00;
@@ -556,8 +565,10 @@ void GSDrawScanline::SampleTexture(int pixels, DWORD ztst, DWORD ltf, DWORD tlu,
#endif
c[0] = c00 & GSVector4i::x00ff();
c[1] = (c00 >> 8) & GSVector4i::x00ff();
GSVector4i mask = GSVector4i::x00ff();
c[0] = c00 & mask;
c[1] = (c00 >> 8) & mask;
}
}
@@ -568,15 +579,15 @@ void GSDrawScanline::ColorTFX(DWORD tfx, const GSVector4i& rbf, const GSVector4i
switch(tfx)
{
case TFX_MODULATE:
rbt = rbt.sll16(2).mul16hu(rbf).clamp8();
rbt = rbt.modulate16<1>(rbf).clamp8();
break;
case TFX_DECAL:
break;
case TFX_HIGHLIGHT:
case TFX_HIGHLIGHT2:
af = gaf.srl16(7).yywwl().yywwh();
rbt = rbt.sll16(2).mul16hu(rbf).add16(af).clamp8();
gat = gat.sll16(2).mul16hu(gaf).add16(af).clamp8().mix16(gat);
af = gaf.yywwl().yywwh().srl16(7);
rbt = rbt.modulate16<1>(rbf).add16(af).clamp8();
gat = gat.modulate16<1>(rbf).add16(af).clamp8().mix16(gat);
break;
case TFX_NONE:
rbt = rbf.srl16(7);
@@ -591,7 +602,7 @@ void GSDrawScanline::AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVec
switch(tfx)
{
case TFX_MODULATE:
gat = gat.sll16(2).mul16hu(gaf).clamp8();
gat = gat.modulate16<1>(gaf).clamp8();
if(!tcc) gat = gat.mix16(gaf.srl16(7));
break;
case TFX_DECAL:
@@ -613,13 +624,8 @@ void GSDrawScanline::AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4i& gaf, GSVec
void GSDrawScanline::Fog(const GSVector4i& f, GSVector4i& rb, GSVector4i& ga)
{
GSVector4i frb = m_slenv.frb;
GSVector4i fga = m_slenv.fga;
GSVector4i fog = f.srl16(3);
rb = frb.add16(rb.sub16(frb).sll16(4).mul16hs(fog));
ga = fga.add16(ga.sub16(fga).sll16(4).mul16hs(fog)).mix16(ga);
rb = m_slenv.frb.lerp16<0>(rb, f);
ga = m_slenv.fga.lerp16<0>(ga, f).mix16(ga);
}
bool GSDrawScanline::TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& za, GSVector4i& test)
@@ -2339,8 +2345,8 @@ void GSDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex& v
if(m_sel.ltf)
{
u -= 2048.0f;
v -= 2048.0f;
u -= 0x4000;
v -= 0x4000;
}
}
@@ -2388,8 +2394,10 @@ void GSDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex& v
if(m_sel.abe != 255)
{
c[2] = d & GSVector4i::x00ff();
c[3] = (d >> 8) & GSVector4i::x00ff();
GSVector4i mask = GSVector4i::x00ff();
c[2] = d & mask;
c[3] = (d >> 8) & mask;
if(fpsm == 1)
{
@@ -2404,13 +2412,10 @@ void GSDrawScanline::DrawScanlineT(int top, int left, int right, const Vertex& v
DWORD abec = m_sel.abec;
DWORD abed = m_sel.abed;
GSVector4i a = c[abec * 2 + 1].yywwl().yywwh().sll16(5);
GSVector4i a = c[abec * 2 + 1].yywwl().yywwh().sll16(7);
GSVector4i drb = c[abea * 2 + 0].sub16(c[abeb * 2 + 0]);
GSVector4i dga = c[abea * 2 + 1].sub16(c[abeb * 2 + 1]);
GSVector4i rb = drb.sll16(4).mul16hs(a).add16(c[abed * 2 + 0]);
GSVector4i ga = dga.sll16(4).mul16hs(a).add16(c[abed * 2 + 1]);
GSVector4i rb = GSVector4i::lerp16<1>(c[abea * 2 + 0], c[abeb * 2 + 0], a, c[abed * 2 + 0]);
GSVector4i ga = GSVector4i::lerp16<1>(c[abea * 2 + 1], c[abeb * 2 + 1], a, c[abed * 2 + 1]);
if(m_sel.pabe)
{
@@ -2590,8 +2595,8 @@ void GSDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex&
if(ltf)
{
u -= 2048.0f;
v -= 2048.0f;
u -= 0x4000;
v -= 0x4000;
}
}
@@ -2639,8 +2644,10 @@ void GSDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex&
if(abe != 255)
{
c[2] = d & GSVector4i::x00ff();
c[3] = (d >> 8) & GSVector4i::x00ff();
GSVector4i mask = GSVector4i::x00ff();
c[2] = d & mask;
c[3] = (d >> 8) & mask;
if(fpsm == 1)
{
@@ -2650,13 +2657,10 @@ void GSDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex&
c[4] = GSVector4::zero();
c[5] = m_slenv.afix;
GSVector4i a = c[abec * 2 + 1].yywwl().yywwh().sll16(5);
GSVector4i a = c[abec * 2 + 1].yywwl().yywwh().sll16(7);
/*
GSVector4i drb = c[abea * 2 + 0].sub16(c[abeb * 2 + 0]).sll16(4);
GSVector4i dga = c[abea * 2 + 1].sub16(c[abeb * 2 + 1]).sll16(4);
GSVector4i rb = drb.mul16hs(a).add16(c[abed * 2 + 0]);
GSVector4i ga = dga.mul16hs(a).add16(c[abed * 2 + 1]);
GSVector4i rb = GSVector4i::lerp16<1>(c[abea * 2 + 0], c[abeb * 2 + 0], a, c[abed * 2 + 0]);
GSVector4i ga = GSVector4i::lerp16<1>(c[abea * 2 + 1], c[abeb * 2 + 1], a, c[abed * 2 + 1]);
*/
GSVector4i rb, ga;
@@ -2673,8 +2677,8 @@ void GSDrawScanline::DrawScanlineExT(int top, int left, int right, const Vertex&
if(!(fpsm == 1 && abec == 1))
{
rb = rb.sll16(4).mul16hs(a);
ga = ga.sll16(4).mul16hs(a);
rb = rb.sll16(2).mul16hs(a);
ga = ga.sll16(2).mul16hs(a);
/* TODO

View File

@@ -133,14 +133,14 @@ protected:
{
v.t.x = (float)(int)m_v.UV.U;
v.t.y = (float)(int)m_v.UV.V;
v.t *= 4096.0f / 16;
v.t *= 0x8000 >> 4;
v.t.z = 1.0f;
}
else
{
v.t.x = m_v.ST.S;
v.t.y = m_v.ST.T;
v.t *= GSVector4((float)(4096 << m_context->TEX0.TW), (float)(4096 << m_context->TEX0.TH));
v.t *= GSVector4((float)(0x8000 << m_context->TEX0.TW), (float)(0x8000 << m_context->TEX0.TH));
v.t.z = m_v.RGBAQ.Q;
}
}

View File

@@ -680,6 +680,75 @@ public:
#endif
template<int shift> GSVector4i lerp16(const GSVector4i& a, const GSVector4i& f) const
{
// (a - this) * f << shift + this
GSVector4i v = a.sub16(*this);
#if _M_SSE >= 0x301
if(shift > 0) v = v.sll16(shift);
v = v.mul16hrs(f);
#else
v = v.sll16(shift + 1);
v = v.mul16hs(f);
#endif
return add16(v);
}
template<int shift> static GSVector4i lerp16(const GSVector4i& a, const GSVector4i& b, const GSVector4i& c, const GSVector4i& d)
{
// (a - b) * c << shift + d
GSVector4i v = a.sub16(b);
#if _M_SSE >= 0x301
if(shift > 0) v = v.sll16(shift);
v = v.mul16hrs(c);
#else
v = v.sll16(shift + 1);
v = v.mul16hs(c);
#endif
return d.add16(v);
}
template<int shift> GSVector4i modulate16(const GSVector4i& f) const
{
// a * f << shift
GSVector4i v = *this;
#if _M_SSE >= 0x301
if(shift > 0) v = v.sll16(shift);
v = v.mul16hrs(f);
#else
v = v.sll16(shift + 1);
v = v.mul16hs(f);
#endif
return v;
}
GSVector4i eq8(const GSVector4i& v) const
{
return GSVector4i(_mm_cmpeq_epi8(m, v.m));