This commit is contained in:
gabest
2008-08-17 13:51:32 +00:00
parent 59f577acff
commit b67a8568bd
14 changed files with 1665 additions and 1358 deletions

View File

@@ -835,6 +835,207 @@ public:
#endif
}
__forceinline static void ReadBlock4P(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x200
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
GSVector4i mask(0x0f0f0f0f);
for(int i = 0; i < 2; i++)
{
// col 0, 2
v0 = s[i * 8 + 0];
v1 = s[i * 8 + 1];
v2 = s[i * 8 + 2];
v3 = s[i * 8 + 3];
GSVector4i::sw8(v0, v1, v2, v3);
GSVector4i::sw16(v0, v1, v2, v3);
GSVector4i::sw8(v0, v2, v1, v3);
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0 & mask));
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1 & mask));
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2 & mask));
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3 & mask));
dst += dstpitch * 2;
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0.andnot(mask)).yxwz() >> 4);
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1.andnot(mask)).yxwz() >> 4);
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2.andnot(mask)).yxwz() >> 4);
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3.andnot(mask)).yxwz() >> 4);
dst += dstpitch * 2;
// col 1, 3
v0 = s[i * 8 + 4];
v1 = s[i * 8 + 5];
v2 = s[i * 8 + 6];
v3 = s[i * 8 + 7];
GSVector4i::sw8(v0, v1, v2, v3);
GSVector4i::sw16(v0, v1, v2, v3);
GSVector4i::sw8(v0, v2, v1, v3);
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0 & mask).yxwz());
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1 & mask).yxwz());
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2 & mask).yxwz());
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3 & mask).yxwz());
dst += dstpitch * 2;
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0.andnot(mask)) >> 4);
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1.andnot(mask)) >> 4);
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2.andnot(mask)) >> 4);
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3.andnot(mask)) >> 4);
dst += dstpitch * 2;
}
#else
// TODO
#endif
}
__forceinline static void ReadBlock8HP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x200
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
for(int i = 0; i < 4; i++)
{
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24));
GSVector4i::storel(dst, v0);
dst += dstpitch;
GSVector4i::storeh(dst, v0);
dst += dstpitch;
}
#else
const DWORD* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
for(int i = 0; i < 8; i++)
{
((BYTE*)dst)[i] = ((DWORD*)src)[s[i]] >> 24;
}
}
#endif
}
__forceinline static void ReadBlock4HLP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x200
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
GSVector4i mask(0x0f0f0f0f);
for(int i = 0; i < 4; i++)
{
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24)) & mask;
GSVector4i::storel(dst, v0);
dst += dstpitch;
GSVector4i::storeh(dst, v0);
dst += dstpitch;
}
#else
const DWORD* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
for(int i = 0; i < 8; i++)
{
((BYTE*)dst)[i] = (((DWORD*)src)[s[i]] >> 24) & 0xf;
}
}
#endif
}
__forceinline static void ReadBlock4HHP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
{
#if _M_SSE >= 0x200
const GSVector4i* s = (const GSVector4i*)src;
GSVector4i v0, v1, v2, v3;
for(int i = 0; i < 4; i++)
{
v0 = s[i * 4 + 0];
v1 = s[i * 4 + 1];
v2 = s[i * 4 + 2];
v3 = s[i * 4 + 3];
GSVector4i::sw64(v0, v1, v2, v3);
v0 = ((v0 >> 28).ps32(v1 >> 28)).pu16((v2 >> 28).ps32(v3 >> 28));
GSVector4i::storel(dst, v0);
dst += dstpitch;
GSVector4i::storeh(dst, v0);
dst += dstpitch;
}
#else
const DWORD* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
for(int i = 0; i < 8; i++)
{
((BYTE*)dst)[i] = ((DWORD*)src)[s[i]] >> 28;
}
}
#endif
}
static void UnpackBlock24(const BYTE* RESTRICT src, int srcpitch, DWORD* RESTRICT dst)
{
#if _M_SSE >= 0x200

View File

@@ -47,8 +47,8 @@ public:
{
GSVector4i dx10;
GSVector4 dx9;
GSVector4 hw;
GSVector4 sw;
GSVector4 in;
GSVector4 ex;
} scissor;
GSDrawingContext()
@@ -83,13 +83,13 @@ public:
scissor.dx9 = GSVector4(scissor.dx10);
scissor.hw = GSVector4(
scissor.in = GSVector4(
(int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1 + 1,
(int)SCISSOR.SCAY1 + 1);
scissor.sw = GSVector4i(
scissor.ex = GSVector4i(
(int)SCISSOR.SCAX0,
(int)SCISSOR.SCAY0,
(int)SCISSOR.SCAX1,

View File

@@ -166,6 +166,7 @@ GSLocalMemory::GSLocalMemory()
m_psm[i].ri = &GSLocalMemory::ReadImageX; // TODO
m_psm[i].rtx = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxNP = &GSLocalMemory::ReadTexture32;
m_psm[i].rtxP = &GSLocalMemory::ReadTexture32;
m_psm[i].bpp = m_psm[i].trbpp = 32;
m_psm[i].pal = 0;
m_psm[i].bs = CSize(8, 8);
@@ -341,6 +342,12 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMZ16].rtxNP = &GSLocalMemory::ReadTexture16ZNP;
m_psm[PSM_PSMZ16S].rtxNP = &GSLocalMemory::ReadTexture16SZNP;
m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P;
m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P;
m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP;
m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP;
m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP;
m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256;
m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16;
@@ -381,7 +388,7 @@ GSLocalMemory::~GSLocalMemory()
VirtualFree(m_vm8, 0, MEM_RELEASE);
}
bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)
bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)
{
const psm_t& tbl = m_psm[psm];
@@ -404,31 +411,39 @@ bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD
CRect clip;
clip.left = (r.left + (w-1)) & ~(w-1);
clip.top = (r.top + (h-1)) & ~(h-1);
clip.right = r.right & ~(w-1);
clip.bottom = r.bottom & ~(h-1);
clip.left = (r.x + (w - 1)) & ~(w - 1);
clip.top = (r.y + (h - 1)) & ~(h - 1);
clip.right = r.z & ~(w - 1);
clip.bottom = r.w & ~(h - 1);
for(int y = r.top; y < clip.top; y++)
for(int y = r.y; y < clip.top; y++)
{
for(int x = r.left; x < r.right; x++)
for(int x = r.x; x < r.z; x++)
{
(this->*wp)(x, y, c, bp, bw);
}
}
if(r.left < clip.left || clip.right < r.right)
for(int y = clip.bottom; y < r.w; y++)
{
for(int x = r.x; x < r.z; x++)
{
(this->*wp)(x, y, c, bp, bw);
}
}
if(r.x < clip.left || clip.right < r.z)
{
for(int y = clip.top; y < clip.bottom; y += h)
{
for(int ys = y, ye = y + h; ys < ye; ys++)
{
for(int x = r.left; x < clip.left; x++)
for(int x = r.x; x < clip.left; x++)
{
(this->*wp)(x, ys, c, bp, bw);
}
for(int x = clip.right; x < r.right; x++)
for(int x = clip.right; x < r.z; x++)
{
(this->*wp)(x, ys, c, bp, bw);
}
@@ -524,14 +539,6 @@ bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD
#endif
}
for(int y = clip.bottom; y < r.bottom; y++)
{
for(int x = r.left; x < r.right; x++)
{
(this->*wp)(x, y, c, bp, bw);
}
}
return true;
}
@@ -1876,6 +1883,53 @@ void GSLocalMemory::ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, con
}
}
// 32/8
void GSLocalMemory::ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(16, 16, 8)
{
ReadBlock8<true>(&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(32, 16, 8)
{
ReadBlock4P(&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8HP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock8HP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock4HLP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(8, 8, 8)
{
ReadBlock4HHP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
//
template<typename T>

View File

@@ -59,7 +59,7 @@ public:
writeFrameAddr wfa;
writeImage wi;
readImage ri;
readTexture rtx, rtxNP;
readTexture rtx, rtxNP, rtxP;
DWORD bpp, pal, trbpp;
CSize bs, pgs;
int* rowOffset[8];
@@ -547,21 +547,6 @@ public:
WritePixel16(addr, (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
}
__forceinline void WritePixel32(DWORD* dst, DWORD addr, DWORD c)
{
dst[addr] = c;
}
__forceinline void WritePixel24(DWORD* dst, DWORD addr, DWORD c)
{
dst[addr] = (dst[addr] & 0xff000000) | (c & 0x00ffffff);
}
__forceinline void WritePixel16(WORD* dst, DWORD addr, DWORD c)
{
dst[addr] = (WORD)c;
}
__forceinline void WritePixel32(int x, int y, DWORD c, DWORD bp, DWORD bw)
{
WritePixel32(PixelAddress32(x, y, bp, bw), c);
@@ -911,244 +896,9 @@ public:
}
}
__forceinline GSVector4i ReadFrameX(int psm, const GSVector4i& addr) const
{
GSVector4i c, r, g, b, a;
switch(psm)
{
case 0:
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm32);
#else
c = GSVector4i(
(int)ReadPixel32(addr.u32[0]),
(int)ReadPixel32(addr.u32[1]),
(int)ReadPixel32(addr.u32[2]),
(int)ReadPixel32(addr.u32[3]));
#endif
break;
case 1:
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm32);
#else
c = GSVector4i(
(int)ReadPixel32(addr.u32[0]),
(int)ReadPixel32(addr.u32[1]),
(int)ReadPixel32(addr.u32[2]),
(int)ReadPixel32(addr.u32[3]));
#endif
c = (c & GSVector4i::x00ffffff(addr)) | GSVector4i::x80000000(addr);
break;
case 2:
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm16);
#else
c = GSVector4i(
(int)ReadPixel16(addr.u32[0]),
(int)ReadPixel16(addr.u32[1]),
(int)ReadPixel16(addr.u32[2]),
(int)ReadPixel16(addr.u32[3]));
#endif
c = ((c & 0x8000) << 16) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
break;
default:
ASSERT(0);
c = GSVector4i::zero();
}
return c;
}
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const
{
GSVector4i z;
switch(psm)
{
case 0:
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm32);
#else
z = GSVector4i(
(int)ReadPixel32(addr.u32[0]),
(int)ReadPixel32(addr.u32[1]),
(int)ReadPixel32(addr.u32[2]),
(int)ReadPixel32(addr.u32[3]));
#endif
break;
case 1:
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm32);
#else
z = GSVector4i(
(int)ReadPixel32(addr.u32[0]),
(int)ReadPixel32(addr.u32[1]),
(int)ReadPixel32(addr.u32[2]),
(int)ReadPixel32(addr.u32[3]));
#endif
z = z & GSVector4i::x00ffffff(addr);
break;
case 2:
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm16);
#else
z = GSVector4i(
(int)ReadPixel16(addr.u32[0]),
(int)ReadPixel16(addr.u32[1]),
(int)ReadPixel16(addr.u32[2]),
(int)ReadPixel16(addr.u32[3]));
#endif
break;
default:
ASSERT(0);
z = GSVector4i::zero();
}
return z;
}
__forceinline void WriteFrameAndZBufX(
int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f,
int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z,
int pixels)
{
// FIXME: compiler problem or not enough xmm regs in x86 mode to store the address regs (fa, za)
DWORD* RESTRICT vm32 = m_vm32;
WORD* RESTRICT vm16 = m_vm16;
GSVector4i c = f;
if(fpsm == 2)
{
GSVector4i rb = c & 0x00f800f8;
GSVector4i ga = c & 0x8000f800;
c = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3);
}
#if _M_SSE >= 0x401
if(fm.extract32<0>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[0], c.extract32<0>()); break;
case 1: WritePixel24(vm32, fa.u32[0], c.extract32<0>()); break;
case 2: WritePixel16(vm16, fa.u32[0], c.extract16<0 * 2>()); break;
}
}
if(zm.extract32<0>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[0], z.extract32<0>()); break;
case 1: WritePixel24(vm32, za.u32[0], z.extract32<0>()); break;
case 2: WritePixel16(vm16, za.u32[0], z.extract16<0 * 2>()); break;
}
}
if(pixels <= 1) return;
if(fm.extract32<1>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[1], c.extract32<1>()); break;
case 1: WritePixel24(vm32, fa.u32[1], c.extract32<1>()); break;
case 2: WritePixel16(vm16, fa.u32[1], c.extract16<1 * 2>()); break;
}
}
if(zm.extract32<1>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[1], z.extract32<1>()); break;
case 1: WritePixel24(vm32, za.u32[1], z.extract32<1>()); break;
case 2: WritePixel16(vm16, za.u32[1], z.extract16<1 * 2>()); break;
}
}
if(pixels <= 2) return;
if(fm.extract32<2>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[2], c.extract32<2>()); break;
case 1: WritePixel24(vm32, fa.u32[2], c.extract32<2>()); break;
case 2: WritePixel16(vm16, fa.u32[2], c.extract16<2 * 2>()); break;
}
}
if(zm.extract32<2>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[2], z.extract32<2>()); break;
case 1: WritePixel24(vm32, za.u32[2], z.extract32<2>()); break;
case 2: WritePixel16(vm16, za.u32[2], z.extract16<2 * 2>()); break;
}
}
if(pixels <= 3) return;
if(fm.extract32<3>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[3], c.extract32<3>()); break;
case 1: WritePixel24(vm32, fa.u32[3], c.extract32<3>()); break;
case 2: WritePixel16(vm16, fa.u32[3], c.extract16<3 * 2>()); break;
}
}
if(zm.extract32<3>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[3], z.extract32<3>()); break;
case 1: WritePixel24(vm32, za.u32[3], z.extract32<3>()); break;
case 2: WritePixel16(vm16, za.u32[3], z.extract16<3 * 2>()); break;
}
}
#else
int i = 0;
do
{
if(fm.u32[i] != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[i], c.u32[i]); break;
case 1: WritePixel24(vm32, fa.u32[i], c.u32[i]); break;
case 2: WritePixel16(vm16, fa.u32[i], c.u16[i * 2]); break;
}
}
if(zm.u32[i] != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[i], z.u32[i]); break;
case 1: WritePixel24(vm32, za.u32[i], z.u32[i]); break;
case 2: WritePixel16(vm16, za.u32[i], z.u16[i * 2]); break;
}
}
}
while(++i < pixels);
#endif
}
// FillRect
bool FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD bw);
bool FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw);
//
@@ -1212,6 +962,14 @@ public:
void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
void ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
// 32/8
void ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8HP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
//
static DWORD m_xtbl[1024], m_ytbl[1024];

View File

@@ -86,7 +86,7 @@ GSRasterizer::~GSRasterizer()
m_comap.RemoveAll();
}
int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
int GSRasterizer::Draw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture)
{
GSDrawingEnvironment& env = m_state->m_env;
GSDrawingContext* context = m_state->m_context;
@@ -101,15 +101,21 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
// m_scissor
m_scissor.left = max(context->SCISSOR.SCAX0, 0);
m_scissor.top = max(context->SCISSOR.SCAY0, 0);
m_scissor.right = min(context->SCISSOR.SCAX1 + 1, context->FRAME.FBW * 64);
m_scissor.bottom = min(context->SCISSOR.SCAY1 + 1, 4096);
m_scissor = context->scissor.in;
// TODO: find a game that overflows and check which one is the right behaviour
m_scissor.z = min(m_scissor.z, context->FRAME.FBW * 64);
// m_sel
m_sel.dw = 0;
if(PRIM->AA1)
{
// TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1)
}
m_sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
m_sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
m_sel.ztst = context->TEST.ZTE && context->TEST.ZTST > 1 ? context->TEST.ZTST : context->ZBUF.ZMSK ? 0 : 1;
@@ -159,6 +165,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
m_sel.pabe = PRIM->ABE ? env.PABE.PABE : 0;
m_sel.rfb = m_sel.date || m_sel.abe != 255 || m_sel.atst != 1 && m_sel.afail == 3 || context->FRAME.FBMSK != 0 && context->FRAME.FBMSK != 0xffffffff;
m_sel.wzb = context->DepthWrite();
m_sel.tlu = PRIM->TME && GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0 ? 1 : 0;
m_dsf = m_ds[m_sel.fpsm][m_sel.zpsm][m_sel.ztst][m_sel.iip];
@@ -212,7 +219,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
SetupColumnOffset();
m_slenv.steps = 0;
m_slenv.rtx = GSLocalMemory::m_psm[context->TEX0.PSM].rtx;
m_slenv.vm = m_state->m_mem.m_vm32;
m_slenv.fbr = m_fbco->row;
m_slenv.zbr = m_zbco->row;
m_slenv.fbc = m_fbco->col;
@@ -225,9 +232,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
m_slenv.aref = GSVector4i((int)context->TEST.AREF + (m_sel.atst == ATST_LESS ? -1 : m_sel.atst == ATST_GREATER ? +1 : 0));
m_slenv.afix = GSVector4((float)(int)context->ALPHA.FIX);
m_slenv.afix2 = m_slenv.afix * (2.0f / 256);
m_slenv.f.r = GSVector4((float)(int)env.FOGCOL.FCR);
m_slenv.f.g = GSVector4((float)(int)env.FOGCOL.FCG);
m_slenv.f.b = GSVector4((float)(int)env.FOGCOL.FCB);
m_slenv.fc = GSVector4((DWORD)env.FOGCOL.ai32[0]);
if(m_sel.fpsm == 1)
{
@@ -236,8 +241,9 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
if(PRIM->TME)
{
m_texture = texture;
m_tw = max(3, context->TEX0.TW);
m_slenv.tex = texture->m_buff;
m_slenv.pal = m_state->m_mem.m_clut;
m_slenv.tw = texture->m_tw;
short tw = (short)(1 << context->TEX0.TW);
short th = (short)(1 << context->TEX0.TH);
@@ -297,8 +303,6 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
m_slenv.t.min = m_slenv.t.min.xxxxl().xxxxh();
m_slenv.t.max = m_slenv.t.max.xxxxl().xxxxh();
m_slenv.t.mask = m_slenv.t.mask.xxzz();
// m_tw = (int)max(context->TEX0.TW, TEXTURE_CACHE_WIDTH);
}
//
@@ -355,7 +359,7 @@ void GSRasterizer::DrawPoint(Vertex* v)
GSVector4i p(v->p);
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
if(m_scissor.x <= p.x && p.x < m_scissor.z && m_scissor.y <= p.y && p.y < m_scissor.w)
{
if((p.y % m_threads) == m_id)
{
@@ -481,8 +485,8 @@ void GSRasterizer::DrawTriangleSection(Vertex& l, const Vertex& dl, GSVector4& r
int top = tb.z;
int bottom = tb.w;
if(top < m_scissor.top) top = m_scissor.top;
if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
if(top < m_scissor.y) top = m_scissor.y;
if(bottom > m_scissor.w) bottom = m_scissor.w;
if(top < bottom)
{
@@ -520,8 +524,8 @@ if(scanmsk >= 0)
int left = lr.x;
int right = lr.y;
if(left < m_scissor.left) left = m_scissor.left;
if(right > m_scissor.right) right = m_scissor.right;
if(left < m_scissor.x) left = m_scissor.x;
if(right > m_scissor.z) right = m_scissor.z;
if(right > left)
{
@@ -554,25 +558,35 @@ void GSRasterizer::DrawSprite(Vertex* vertices)
v[1].p = vertices[1].p.blend8(vertices[0].p, mask);
v[1].t = vertices[1].t.blend8(vertices[0].t, mask);
GSVector4i tlbr(v[0].p.xyxy(v[1].p).ceil());
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
int top = tlbr.y;
int bottom = tlbr.w;
int& top = r.y;
int& bottom = r.w;
if(top < m_scissor.top) top = m_scissor.top;
if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
int& left = r.x;
int& right = r.z;
#if _M_SSE >= 0x401
r = r.sat_i32(m_scissor);
if((r < r.zwzw()).mask() != 0x00ff) return;
#else
if(top < m_scissor.y) top = m_scissor.y;
if(bottom > m_scissor.w) bottom = m_scissor.w;
if(top >= bottom) return;
int left = tlbr.x;
int right = tlbr.z;
if(left < m_scissor.left) left = m_scissor.left;
if(right > m_scissor.right) right = m_scissor.right;
if(left < m_scissor.x) left = m_scissor.x;
if(right > m_scissor.z) right = m_scissor.z;
if(left >= right) return;
#endif
Vertex scan = v[0];
if(DrawSolidRect(left, top, right, bottom, scan))
if(DrawSolidRect(r, scan))
{
return;
}
@@ -618,9 +632,9 @@ void GSRasterizer::DrawSprite(Vertex* vertices)
}
}
bool GSRasterizer::DrawSolidRect(int left, int top, int right, int bottom, const Vertex& v)
bool GSRasterizer::DrawSolidRect(const GSVector4i& r, const Vertex& v)
{
if(left >= right || top >= bottom || !m_solidrect)
if(r.x >= r.z || r.y >= r.w || !m_solidrect)
{
return false;
}
@@ -630,10 +644,8 @@ bool GSRasterizer::DrawSolidRect(int left, int top, int right, int bottom, const
return true;
}
ASSERT(top >= 0);
ASSERT(bottom >= 0);
CRect r(left, top, right, bottom);
ASSERT(r.y >= 0);
ASSERT(r.w >= 0);
GSDrawingContext* context = m_state->m_context;
@@ -827,9 +839,6 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
continue;
}
// DWORD mask = (DWORD)(((int)steps - 4) >> 31);
// int pixels = (steps & mask) | (4 & ~mask);
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4)));
GSVector4 c[12];
@@ -853,7 +862,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
}
}
SampleTexture(ztst, test, pixels, m_sel.ltf, u, v, c);
SampleTexture(ztst, test, pixels, m_sel.ltf, m_sel.tlu, u, v, c);
}
AlphaTFX(m_sel.tfx, m_sel.tcc, a, c[3]);
@@ -879,7 +888,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
if(m_sel.rfb)
{
d = m_state->m_mem.ReadFrameX(fpsm == 1 ? 0 : fpsm, fa);
d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa);
if(fpsm != 1 && m_sel.date)
{
@@ -957,7 +966,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
s = s.blend(d, fm);
}
m_state->m_mem.WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
}
while(0);
@@ -989,8 +998,12 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
}
}
void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, const GSVector4& u, const GSVector4& v, GSVector4* c)
void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD tlu, const GSVector4& u, const GSVector4& v, GSVector4* c)
{
const void* RESTRICT tex = m_slenv.tex;
const DWORD* RESTRICT pal = m_slenv.pal;
const DWORD tw = m_slenv.tw;
if(ltf)
{
GSVector4 uf = u.floor();
@@ -1006,26 +1019,51 @@ void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels,
int i = 0;
do
if(tlu)
{
if(ztst > 1 && test.u32[i])
do
{
continue;
if(ztst > 1 && test.u32[i])
{
continue;
}
GSVector4 c00 = GSVector4(pal[((const BYTE*)tex)[(uv0.u16[i + 4] << tw) + uv0.u16[i]]]);
GSVector4 c01 = GSVector4(pal[((const BYTE*)tex)[(uv0.u16[i + 4] << tw) + uv1.u16[i]]]);
GSVector4 c10 = GSVector4(pal[((const BYTE*)tex)[(uv1.u16[i + 4] << tw) + uv0.u16[i]]]);
GSVector4 c11 = GSVector4(pal[((const BYTE*)tex)[(uv1.u16[i + 4] << tw) + uv1.u16[i]]]);
c00 = c00.lerp(c01, uff.v[i]);
c10 = c10.lerp(c11, uff.v[i]);
c00 = c00.lerp(c10, vff.v[i]);
c[i] = c00;
}
GSVector4 c00(ReadTexel(uv0.u16[i], uv0.u16[i + 4]));
GSVector4 c01(ReadTexel(uv1.u16[i], uv0.u16[i + 4]));
GSVector4 c10(ReadTexel(uv0.u16[i], uv1.u16[i + 4]));
GSVector4 c11(ReadTexel(uv1.u16[i], uv1.u16[i + 4]));
c00 = c00.lerp(c01, uff.v[i]);
c10 = c10.lerp(c11, uff.v[i]);
c00 = c00.lerp(c10, vff.v[i]);
c[i] = c00;
while(++i < pixels);
}
else
{
do
{
if(ztst > 1 && test.u32[i])
{
continue;
}
GSVector4 c00 = GSVector4(((const DWORD*)tex)[(uv0.u16[i + 4] << tw) + uv0.u16[i]]);
GSVector4 c01 = GSVector4(((const DWORD*)tex)[(uv0.u16[i + 4] << tw) + uv1.u16[i]]);
GSVector4 c10 = GSVector4(((const DWORD*)tex)[(uv1.u16[i + 4] << tw) + uv0.u16[i]]);
GSVector4 c11 = GSVector4(((const DWORD*)tex)[(uv1.u16[i + 4] << tw) + uv1.u16[i]]);
c00 = c00.lerp(c01, uff.v[i]);
c10 = c10.lerp(c11, uff.v[i]);
c00 = c00.lerp(c10, vff.v[i]);
c[i] = c00;
}
while(++i < pixels);
}
while(++i < pixels);
GSVector4::transpose(c[0], c[1], c[2], c[3]);
}
@@ -1037,16 +1075,32 @@ void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels,
int i = 0;
do
if(tlu)
{
if(ztst > 1 && test.u32[i])
do
{
continue;
}
if(ztst > 1 && test.u32[i])
{
continue;
}
c00.u32[i] = ReadTexel(uv.u16[i], uv.u16[i + 4]);
c00.u32[i] = pal[((const BYTE*)tex)[(uv.u16[i + 4] << tw) + uv.u16[i]]];
}
while(++i < pixels);
}
else
{
do
{
if(ztst > 1 && test.u32[i])
{
continue;
}
c00.u32[i] = ((const DWORD*)tex)[(uv.u16[i + 4] << tw) + uv.u16[i]];
}
while(++i < pixels);
}
while(++i < pixels);
// GSVector4::expand(c00, c[0], c[1], c[2], c[3]);
@@ -1105,16 +1159,18 @@ void GSRasterizer::AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4& af, GSVector4
void GSRasterizer::Fog(const GSVector4& f, GSVector4& r, GSVector4& g, GSVector4& b)
{
r = m_slenv.f.r.lerp(r, f);
g = m_slenv.f.g.lerp(g, f);
b = m_slenv.f.b.lerp(b, f);
GSVector4 fc = m_slenv.fc;
r = fc.xxxx().lerp(r, f);
g = fc.yyyy().lerp(g, f);
b = fc.zzzz().lerp(b, f);
}
bool GSRasterizer::TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& za, GSVector4i& test)
{
if(ztst > 1)
{
GSVector4i zd = m_state->m_mem.ReadZBufX(zpsm, za);
GSVector4i zd = ReadZBufX(zpsm, za);
GSVector4i zso = zs;
GSVector4i zdo = zd;
@@ -1192,6 +1248,277 @@ bool GSRasterizer::TestAlpha(DWORD atst, DWORD afail, const GSVector4& a, GSVect
return true;
}
DWORD GSRasterizer::ReadPixel32(DWORD* RESTRICT vm, DWORD addr)
{
return vm[addr];
}
DWORD GSRasterizer::ReadPixel24(DWORD* RESTRICT vm, DWORD addr)
{
return vm[addr] & 0x00ffffff;
}
DWORD GSRasterizer::ReadPixel16(WORD* RESTRICT vm, DWORD addr)
{
return (DWORD)vm[addr];
}
void GSRasterizer::WritePixel32(DWORD* RESTRICT vm, DWORD addr, DWORD c)
{
vm[addr] = c;
}
void GSRasterizer::WritePixel24(DWORD* RESTRICT vm, DWORD addr, DWORD c)
{
vm[addr] = (vm[addr] & 0xff000000) | (c & 0x00ffffff);
}
void GSRasterizer::WritePixel16(WORD* RESTRICT vm, DWORD addr, DWORD c)
{
vm[addr] = (WORD)c;
}
GSVector4i GSRasterizer::ReadFrameX(int psm, const GSVector4i& addr) const
{
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
GSVector4i c, r, g, b, a;
switch(psm)
{
case 0:
#if _M_SSE >= 0x401
c = addr.gather32_32(vm32);
#else
c = GSVector4i(
ReadPixel32(vm32, addr.u32[0]),
ReadPixel32(vm32, addr.u32[1]),
ReadPixel32(vm32, addr.u32[2]),
ReadPixel32(vm32, addr.u32[3]));
#endif
break;
case 1:
#if _M_SSE >= 0x401
c = addr.gather32_32(vm32);
#else
c = GSVector4i(
ReadPixel32(vm32, addr.u32[0]),
ReadPixel32(vm32, addr.u32[1]),
ReadPixel32(vm32, addr.u32[2]),
ReadPixel32(vm32, addr.u32[3]));
#endif
c = (c & GSVector4i::x00ffffff(addr)) | GSVector4i::x80000000(addr);
break;
case 2:
#if _M_SSE >= 0x401
c = addr.gather32_32(vm16);
#else
c = GSVector4i(
ReadPixel16(vm16, addr.u32[0]),
ReadPixel16(vm16, addr.u32[1]),
ReadPixel16(vm16, addr.u32[2]),
ReadPixel16(vm16, addr.u32[3]));
#endif
c = ((c & 0x8000) << 16) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
break;
default:
ASSERT(0);
c = GSVector4i::zero();
}
return c;
}
GSVector4i GSRasterizer::ReadZBufX(int psm, const GSVector4i& addr) const
{
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
GSVector4i z;
switch(psm)
{
case 0:
#if _M_SSE >= 0x401
z = addr.gather32_32(vm32);
#else
z = GSVector4i(
ReadPixel32(vm32, addr.u32[0]),
ReadPixel32(vm32, addr.u32[1]),
ReadPixel32(vm32, addr.u32[2]),
ReadPixel32(vm32, addr.u32[3]));
#endif
break;
case 1:
#if _M_SSE >= 0x401
z = addr.gather32_32(vm32);
#else
z = GSVector4i(
ReadPixel32(vm32, addr.u32[0]),
ReadPixel32(vm32, addr.u32[1]),
ReadPixel32(vm32, addr.u32[2]),
ReadPixel32(vm32, addr.u32[3]));
#endif
z = z & GSVector4i::x00ffffff(addr);
break;
case 2:
#if _M_SSE >= 0x401
z = addr.gather32_32(vm16);
#else
z = GSVector4i(
ReadPixel16(vm16, addr.u32[0]),
ReadPixel16(vm16, addr.u32[1]),
ReadPixel16(vm16, addr.u32[2]),
ReadPixel16(vm16, addr.u32[3]));
#endif
break;
default:
ASSERT(0);
z = GSVector4i::zero();
}
return z;
}
void GSRasterizer::WriteFrameAndZBufX(
int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f,
int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z,
int pixels)
{
// FIXME: compiler problem or not enough xmm regs in x86 mode to store the address regs (fa, za)
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
GSVector4i c = f;
if(fpsm == 2)
{
GSVector4i rb = c & 0x00f800f8;
GSVector4i ga = c & 0x8000f800;
c = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3);
}
#if _M_SSE >= 0x401
if(fm.extract32<0>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[0], c.extract32<0>()); break;
case 1: WritePixel24(vm32, fa.u32[0], c.extract32<0>()); break;
case 2: WritePixel16(vm16, fa.u32[0], c.extract16<0 * 2>()); break;
}
}
if(zm.extract32<0>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[0], z.extract32<0>()); break;
case 1: WritePixel24(vm32, za.u32[0], z.extract32<0>()); break;
case 2: WritePixel16(vm16, za.u32[0], z.extract16<0 * 2>()); break;
}
}
if(pixels <= 1) return;
if(fm.extract32<1>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[1], c.extract32<1>()); break;
case 1: WritePixel24(vm32, fa.u32[1], c.extract32<1>()); break;
case 2: WritePixel16(vm16, fa.u32[1], c.extract16<1 * 2>()); break;
}
}
if(zm.extract32<1>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[1], z.extract32<1>()); break;
case 1: WritePixel24(vm32, za.u32[1], z.extract32<1>()); break;
case 2: WritePixel16(vm16, za.u32[1], z.extract16<1 * 2>()); break;
}
}
if(pixels <= 2) return;
if(fm.extract32<2>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[2], c.extract32<2>()); break;
case 1: WritePixel24(vm32, fa.u32[2], c.extract32<2>()); break;
case 2: WritePixel16(vm16, fa.u32[2], c.extract16<2 * 2>()); break;
}
}
if(zm.extract32<2>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[2], z.extract32<2>()); break;
case 1: WritePixel24(vm32, za.u32[2], z.extract32<2>()); break;
case 2: WritePixel16(vm16, za.u32[2], z.extract16<2 * 2>()); break;
}
}
if(pixels <= 3) return;
if(fm.extract32<3>() != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[3], c.extract32<3>()); break;
case 1: WritePixel24(vm32, fa.u32[3], c.extract32<3>()); break;
case 2: WritePixel16(vm16, fa.u32[3], c.extract16<3 * 2>()); break;
}
}
if(zm.extract32<3>() != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[3], z.extract32<3>()); break;
case 1: WritePixel24(vm32, za.u32[3], z.extract32<3>()); break;
case 2: WritePixel16(vm16, za.u32[3], z.extract16<3 * 2>()); break;
}
}
#else
int i = 0;
do
{
if(fm.u32[i] != 0xffffffff)
{
switch(fpsm)
{
case 0: WritePixel32(vm32, fa.u32[i], c.u32[i]); break;
case 1: WritePixel24(vm32, fa.u32[i], c.u32[i]); break;
case 2: WritePixel16(vm16, fa.u32[i], c.u16[i * 2]); break;
}
}
if(zm.u32[i] != 0xffffffff)
{
switch(zpsm)
{
case 0: WritePixel32(vm32, za.u32[i], z.u32[i]); break;
case 1: WritePixel24(vm32, za.u32[i], z.u32[i]); break;
case 2: WritePixel16(vm16, za.u32[i], z.u16[i * 2]); break;
}
}
}
while(++i < pixels);
#endif
}
//
GSRasterizerMT::GSRasterizerMT(GSState* state, int id, int threads, long* sync)
@@ -1221,7 +1548,7 @@ GSRasterizerMT::~GSRasterizerMT()
}
}
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count, DWORD* texture)
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture)
{
m_vertices = vertices;
m_count = count;

View File

@@ -23,6 +23,7 @@
#include "GSState.h"
#include "GSVertexSW.h"
#include "GSTextureCacheSW.h"
#include "GSAlignedClass.h"
class GSRasterizer : public GSAlignedClass<16>
@@ -34,9 +35,6 @@ protected:
int m_id;
int m_threads;
DWORD* m_texture;
DWORD m_tw;
private:
struct ColumnOffset
{
@@ -49,7 +47,11 @@ private:
{
int steps;
GSLocalMemory::readTexture rtx;
void* vm;
const void* tex;
const DWORD* pal;
DWORD tw;
GSVector4i* fbr;
GSVector4i* zbr;
@@ -64,7 +66,7 @@ private:
GSVector4i aref;
GSVector4 afix;
GSVector4 afix2;
struct {GSVector4 r, g, b;} f;
GSVector4 fc;
GSVector4 dp, dp4;
GSVector4 dt, dt4;
@@ -94,6 +96,7 @@ private:
DWORD pabe:1; // 28
DWORD rfb:1; // 29
DWORD wzb:1; // 30
DWORD tlu:1; // 31
};
struct
@@ -105,10 +108,10 @@ private:
DWORD dw;
operator DWORD() {return dw & 0x7fffffff;}
operator DWORD() {return dw;}// & 0x7fffffff;}
};
CRect m_scissor;
GSVector4i m_scissor;
CRBMapC<DWORD, ColumnOffset*> m_comap;
ColumnOffset* m_fbco;
ColumnOffset* m_zbco;
@@ -134,17 +137,23 @@ private:
template<DWORD sel>
void DrawScanlineEx(int top, int left, int right, const Vertex& v);
__forceinline void SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, const GSVector4& u, const GSVector4& v, GSVector4* c);
__forceinline void SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD pal, const GSVector4& u, const GSVector4& v, GSVector4* c);
__forceinline void ColorTFX(DWORD tfx, const GSVector4& rf, const GSVector4& gf, const GSVector4& bf, const GSVector4& af, GSVector4& rt, GSVector4& gt, GSVector4& bt);
__forceinline void AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4& af, GSVector4& at);
__forceinline void Fog(const GSVector4& f, GSVector4& r, GSVector4& g, GSVector4& b);
__forceinline bool TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& za, GSVector4i& test);
__forceinline bool TestAlpha(DWORD atst, DWORD afail, const GSVector4& a, GSVector4i& fm, GSVector4i& zm, GSVector4i& test);
__forceinline DWORD ReadTexel(int x, int y)
{
return m_texture[(y << m_tw) + x];
}
__forceinline static DWORD ReadPixel32(DWORD* RESTRICT vm, DWORD addr);
__forceinline static DWORD ReadPixel24(DWORD* RESTRICT vm, DWORD addr);
__forceinline static DWORD ReadPixel16(WORD* RESTRICT vm, DWORD addr);
__forceinline static void WritePixel32(DWORD* RESTRICT vm, DWORD addr, DWORD c);
__forceinline static void WritePixel24(DWORD* RESTRICT vm, DWORD addr, DWORD c);
__forceinline static void WritePixel16(WORD* RESTRICT vm, DWORD addr, DWORD c);
__forceinline GSVector4i ReadFrameX(int psm, const GSVector4i& addr) const;
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const;
__forceinline void WriteFrameAndZBufX(int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f, int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z, int pixels);
__forceinline GSVector4i Wrap(const GSVector4i& t)
{
@@ -158,7 +167,7 @@ private:
void DrawLine(Vertex* v);
void DrawTriangle(Vertex* v);
void DrawSprite(Vertex* v);
bool DrawSolidRect(int left, int top, int right, int bottom, const Vertex& v);
bool DrawSolidRect(const GSVector4i& r, const Vertex& v);
__forceinline void DrawTriangleSection(Vertex& l, const Vertex& dl, GSVector4& r, const GSVector4& dr, const GSVector4& b, const Vertex& dscan);
@@ -166,13 +175,14 @@ public:
GSRasterizer(GSState* state, int id = 0, int threads = 0);
virtual ~GSRasterizer();
int Draw(Vertex* v, int count, DWORD* texture);
int Draw(Vertex* v, int count, const GSTextureCacheSW::GSTexture* texture);
};
class GSRasterizerMT : public GSRasterizer
{
Vertex* m_vertices;
int m_count;
const GSTextureCacheSW::GSTexture* m_texture;
long* m_sync;
bool m_exit;
DWORD m_ThreadId;
@@ -186,5 +196,5 @@ public:
GSRasterizerMT(GSState* state, int id, int threads, long* sync);
virtual ~GSRasterizerMT();
void BeginDraw(Vertex* vertices, int count, DWORD* texture);
void BeginDraw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture);
};

File diff suppressed because it is too large Load Diff

View File

@@ -444,7 +444,7 @@ void GSRendererHW10::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache<Dev
int w = rt.GetWidth();
int h = rt.GetHeight();
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.hw) & CRect(0, 0, w, h);
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.in) & CRect(0, 0, w, h);
//

View File

@@ -414,7 +414,7 @@ void GSRendererHW9::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache<Devi
int w = rt.GetWidth();
int h = rt.GetHeight();
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.hw) & CRect(0, 0, w, h);
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.in) & CRect(0, 0, w, h);
//

View File

@@ -155,7 +155,7 @@ protected:
__forceinline int ScissorTest(const GSVector4& p0, const GSVector4& p1)
{
GSVector4 scissor = m_context->scissor.sw;
GSVector4 scissor = m_context->scissor.ex;
GSVector4 v0 = p0 < scissor;
GSVector4 v1 = p1 > scissor.zwxy();
@@ -226,20 +226,24 @@ protected:
{
// TODO: lot to optimize here
DWORD* texture = NULL;
GSDrawingContext* context = m_context;
const GSTextureCacheSW::GSTexture* texture = NULL;
if(PRIM->TME)
{
int w = 1 << m_context->TEX0.TW;
int h = 1 << m_context->TEX0.TH;
m_mem.m_clut.Read32(context->TEX0, m_env.TEXA);
int wms = m_context->CLAMP.WMS;
int wmt = m_context->CLAMP.WMT;
int w = 1 << context->TEX0.TW;
int h = 1 << context->TEX0.TH;
int minu = (int)m_context->CLAMP.MINU;
int minv = (int)m_context->CLAMP.MINV;
int maxu = (int)m_context->CLAMP.MAXU;
int maxv = (int)m_context->CLAMP.MAXV;
int wms = context->CLAMP.WMS;
int wmt = context->CLAMP.WMT;
int minu = (int)context->CLAMP.MINU;
int minv = (int)context->CLAMP.MINV;
int maxu = (int)context->CLAMP.MAXU;
int maxv = (int)context->CLAMP.MAXV;
CRect r = CRect(0, 0, w, h);
@@ -281,7 +285,7 @@ protected:
r &= CRect(0, 0, w, h);
texture = m_tc->Lookup(m_context->TEX0, m_env.TEXA, &r);
texture = m_tc->Lookup(context->TEX0, m_env.TEXA, &r);
if(!texture) {ASSERT(0); return;}
}
@@ -323,10 +327,10 @@ protected:
{
CRect r;
r.left = max(m_context->SCISSOR.SCAX0, 0);
r.top = max(m_context->SCISSOR.SCAY0, 0);
r.right = min(m_context->SCISSOR.SCAX1 + 1, m_context->FRAME.FBW * 64);
r.bottom = min(m_context->SCISSOR.SCAY1 + 1, 4096);
r.left = max(context->SCISSOR.SCAX0, 0);
r.top = max(context->SCISSOR.SCAY0, 0);
r.right = min(context->SCISSOR.SCAX1 + 1, context->FRAME.FBW * 64);
r.bottom = min(context->SCISSOR.SCAY1 + 1, 4096);
GSVector4 minv(+1e10f);
GSVector4 maxv(-1e10f);
@@ -348,16 +352,16 @@ protected:
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBP = m_context->FRAME.Block();
BITBLTBUF.DBW = m_context->FRAME.FBW;
BITBLTBUF.DPSM = m_context->FRAME.PSM;
BITBLTBUF.DBP = context->FRAME.Block();
BITBLTBUF.DBW = context->FRAME.FBW;
BITBLTBUF.DPSM = context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
if(m_context->DepthWrite())
if(context->DepthWrite())
{
BITBLTBUF.DBP = m_context->ZBUF.Block();
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
BITBLTBUF.DBP = context->ZBUF.Block();
BITBLTBUF.DPSM = context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}

View File

@@ -37,14 +37,11 @@ GSTextureCacheSW::~GSTextureCacheSW()
RemoveAll();
}
DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
{
GSLocalMemory& mem = m_state->m_mem;
mem.m_clut.Read32(TEX0, TEXA);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const DWORD* clut = mem.m_clut;
const CAtlList<GSTexturePage*>& t2p = m_p2t[TEX0.TBP0 >> 5];
@@ -71,11 +68,6 @@ DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
continue;
}
if(psm.pal > 0 && !GSVector4i::compare(t2->m_clut, clut, psm.pal * sizeof(clut[0])))
{
continue;
}
// fprintf(m_log, "cache hit\n");
t = t2;
@@ -134,7 +126,7 @@ DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
return NULL;
}
return t->m_texture;
return t;
}
void GSTextureCacheSW::RemoveAll()
@@ -239,8 +231,8 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
: m_state(state)
, m_texture(NULL)
, m_clut(NULL)
, m_buff(NULL)
, m_tw(0)
, m_maxpages(0)
, m_pages(0)
, m_pos(NULL)
@@ -251,14 +243,9 @@ GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
GSTextureCacheSW::GSTexture::~GSTexture()
{
if(m_texture)
if(m_buff)
{
_aligned_free(m_texture);
}
if(m_clut)
{
_aligned_free(m_clut);
_aligned_free(m_buff);
}
POSITION pos = m_p2te.GetHeadPosition();
@@ -294,7 +281,6 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
GSLocalMemory& mem = m_state->m_mem;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const DWORD* clut = mem.m_clut;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
@@ -302,28 +288,18 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
if(tw < psm.bs.cx) tw = psm.bs.cx;
if(th < psm.bs.cy) th = psm.bs.cy;
if(m_texture == NULL)
if(m_buff == NULL)
{
// fprintf(m_log, "up new (%d %d)\n", tw, th);
m_texture = (DWORD*)_aligned_malloc(tw * th * sizeof(DWORD), 16);
m_buff = _aligned_malloc(tw * th * sizeof(DWORD), 16);
if(m_texture == NULL)
if(m_buff == NULL)
{
return false;
}
m_clut = (DWORD*)_aligned_malloc(256 * sizeof(DWORD), 16);
if(m_clut == NULL)
{
return false;
}
if(psm.pal > 0)
{
memcpy(m_clut, clut, psm.pal * sizeof(clut[0]));
}
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
}
CRect r2;
@@ -336,12 +312,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
r2.bottom = (r->bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
}
DWORD* texture = m_texture;
// TODO
DWORD pitch = 1 << max(3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_texture
GSLocalMemory::readTexture rt = psm.pal > 0 ? psm.rtxP : psm.rtx;
int bytes = psm.pal > 0 ? 1 : 4;
BYTE* dst = (BYTE*)m_buff;
DWORD pitch = (1 << m_tw) * bytes;
DWORD mask = pitch - 1;
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, texture += pitch * psm.pgs.cy)
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, dst += pitch * psm.pgs.cy)
{
if(m_valid[j] == mask)
{
@@ -385,9 +366,9 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
// fprintf(m_log, "up fetch (%d %d) (%d %d %d %d)\n", j, i, r.left, r.top, r.right, r.bottom);
(mem.*psm.rtx)(r, (BYTE*)&texture[x], pitch * 4, TEX0, TEXA);
(mem.*rt)(r, &dst[x * bytes], pitch, TEX0, TEXA);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * bytes);
}
}

View File

@@ -51,8 +51,8 @@ public:
GSState* m_state;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
DWORD* m_texture;
DWORD* m_clut;
void* m_buff;
DWORD m_tw;
DWORD m_valid[32];
DWORD m_maxpages;
DWORD m_pages;
@@ -75,7 +75,7 @@ public:
GSTextureCacheSW(GSState* state);
virtual ~GSTextureCacheSW();
DWORD* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
void RemoveAll();
void IncAge();

View File

@@ -1570,8 +1570,6 @@ public:
__declspec(align(16)) class GSVector4
{
static const __m128 m_ps0123;
public:
union
{
@@ -1590,6 +1588,8 @@ public:
__m128 m;
};
static const __m128 m_ps0123;
GSVector4()
{
}

View File

@@ -192,7 +192,7 @@ EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type)
{
const UINT32 revision = 0;
const UINT32 build = 1;
const UINT32 minor = 9;
const UINT32 minor = 10;
return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (minor << 24);
}