mirror of
https://github.com/PCSX2/gsdx-sourceforge.git
synced 2026-02-04 03:11:19 +01:00
This commit is contained in:
201
gsdx/GSBlock.h
201
gsdx/GSBlock.h
@@ -835,6 +835,207 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadBlock4P(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
GSVector4i mask(0x0f0f0f0f);
|
||||
|
||||
for(int i = 0; i < 2; i++)
|
||||
{
|
||||
// col 0, 2
|
||||
|
||||
v0 = s[i * 8 + 0];
|
||||
v1 = s[i * 8 + 1];
|
||||
v2 = s[i * 8 + 2];
|
||||
v3 = s[i * 8 + 3];
|
||||
|
||||
GSVector4i::sw8(v0, v1, v2, v3);
|
||||
GSVector4i::sw16(v0, v1, v2, v3);
|
||||
GSVector4i::sw8(v0, v2, v1, v3);
|
||||
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0 & mask));
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1 & mask));
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2 & mask));
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3 & mask));
|
||||
|
||||
dst += dstpitch * 2;
|
||||
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0.andnot(mask)).yxwz() >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1.andnot(mask)).yxwz() >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2.andnot(mask)).yxwz() >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3.andnot(mask)).yxwz() >> 4);
|
||||
|
||||
dst += dstpitch * 2;
|
||||
|
||||
// col 1, 3
|
||||
|
||||
v0 = s[i * 8 + 4];
|
||||
v1 = s[i * 8 + 5];
|
||||
v2 = s[i * 8 + 6];
|
||||
v3 = s[i * 8 + 7];
|
||||
|
||||
GSVector4i::sw8(v0, v1, v2, v3);
|
||||
GSVector4i::sw16(v0, v1, v2, v3);
|
||||
GSVector4i::sw8(v0, v2, v1, v3);
|
||||
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0 & mask).yxwz());
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1 & mask).yxwz());
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2 & mask).yxwz());
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3 & mask).yxwz());
|
||||
|
||||
dst += dstpitch * 2;
|
||||
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 0], (v0.andnot(mask)) >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 0 + 16], (v1.andnot(mask)) >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 0], (v2.andnot(mask)) >> 4);
|
||||
GSVector4i::store<true>(&dst[dstpitch * 1 + 16], (v3.andnot(mask)) >> 4);
|
||||
|
||||
dst += dstpitch * 2;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// TODO
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadBlock8HP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
v0 = s[i * 4 + 0];
|
||||
v1 = s[i * 4 + 1];
|
||||
v2 = s[i * 4 + 2];
|
||||
v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
|
||||
v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24));
|
||||
|
||||
GSVector4i::storel(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
|
||||
GSVector4i::storeh(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 8; i++)
|
||||
{
|
||||
((BYTE*)dst)[i] = ((DWORD*)src)[s[i]] >> 24;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadBlock4HLP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
GSVector4i mask(0x0f0f0f0f);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
v0 = s[i * 4 + 0];
|
||||
v1 = s[i * 4 + 1];
|
||||
v2 = s[i * 4 + 2];
|
||||
v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
|
||||
v0 = ((v0 >> 24).ps32(v1 >> 24)).pu16((v2 >> 24).ps32(v3 >> 24)) & mask;
|
||||
|
||||
GSVector4i::storel(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
|
||||
GSVector4i::storeh(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 8; i++)
|
||||
{
|
||||
((BYTE*)dst)[i] = (((DWORD*)src)[s[i]] >> 24) & 0xf;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadBlock4HHP(const BYTE* RESTRICT src, BYTE* RESTRICT dst, int dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
v0 = s[i * 4 + 0];
|
||||
v1 = s[i * 4 + 1];
|
||||
v2 = s[i * 4 + 2];
|
||||
v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw64(v0, v1, v2, v3);
|
||||
|
||||
v0 = ((v0 >> 28).ps32(v1 >> 28)).pu16((v2 >> 28).ps32(v3 >> 28));
|
||||
|
||||
GSVector4i::storel(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
|
||||
GSVector4i::storeh(dst, v0);
|
||||
|
||||
dst += dstpitch;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 8; i++)
|
||||
{
|
||||
((BYTE*)dst)[i] = ((DWORD*)src)[s[i]] >> 28;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void UnpackBlock24(const BYTE* RESTRICT src, int srcpitch, DWORD* RESTRICT dst)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
@@ -47,8 +47,8 @@ public:
|
||||
{
|
||||
GSVector4i dx10;
|
||||
GSVector4 dx9;
|
||||
GSVector4 hw;
|
||||
GSVector4 sw;
|
||||
GSVector4 in;
|
||||
GSVector4 ex;
|
||||
} scissor;
|
||||
|
||||
GSDrawingContext()
|
||||
@@ -83,13 +83,13 @@ public:
|
||||
|
||||
scissor.dx9 = GSVector4(scissor.dx10);
|
||||
|
||||
scissor.hw = GSVector4(
|
||||
scissor.in = GSVector4(
|
||||
(int)SCISSOR.SCAX0,
|
||||
(int)SCISSOR.SCAY0,
|
||||
(int)SCISSOR.SCAX1 + 1,
|
||||
(int)SCISSOR.SCAY1 + 1);
|
||||
|
||||
scissor.sw = GSVector4i(
|
||||
scissor.ex = GSVector4i(
|
||||
(int)SCISSOR.SCAX0,
|
||||
(int)SCISSOR.SCAY0,
|
||||
(int)SCISSOR.SCAX1,
|
||||
|
||||
@@ -166,6 +166,7 @@ GSLocalMemory::GSLocalMemory()
|
||||
m_psm[i].ri = &GSLocalMemory::ReadImageX; // TODO
|
||||
m_psm[i].rtx = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[i].rtxNP = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[i].rtxP = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[i].bpp = m_psm[i].trbpp = 32;
|
||||
m_psm[i].pal = 0;
|
||||
m_psm[i].bs = CSize(8, 8);
|
||||
@@ -341,6 +342,12 @@ GSLocalMemory::GSLocalMemory()
|
||||
m_psm[PSM_PSMZ16].rtxNP = &GSLocalMemory::ReadTexture16ZNP;
|
||||
m_psm[PSM_PSMZ16S].rtxNP = &GSLocalMemory::ReadTexture16SZNP;
|
||||
|
||||
m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P;
|
||||
m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P;
|
||||
m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP;
|
||||
m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP;
|
||||
m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP;
|
||||
|
||||
m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256;
|
||||
m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16;
|
||||
|
||||
@@ -381,7 +388,7 @@ GSLocalMemory::~GSLocalMemory()
|
||||
VirtualFree(m_vm8, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)
|
||||
bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)
|
||||
{
|
||||
const psm_t& tbl = m_psm[psm];
|
||||
|
||||
@@ -404,31 +411,39 @@ bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD
|
||||
|
||||
CRect clip;
|
||||
|
||||
clip.left = (r.left + (w-1)) & ~(w-1);
|
||||
clip.top = (r.top + (h-1)) & ~(h-1);
|
||||
clip.right = r.right & ~(w-1);
|
||||
clip.bottom = r.bottom & ~(h-1);
|
||||
clip.left = (r.x + (w - 1)) & ~(w - 1);
|
||||
clip.top = (r.y + (h - 1)) & ~(h - 1);
|
||||
clip.right = r.z & ~(w - 1);
|
||||
clip.bottom = r.w & ~(h - 1);
|
||||
|
||||
for(int y = r.top; y < clip.top; y++)
|
||||
for(int y = r.y; y < clip.top; y++)
|
||||
{
|
||||
for(int x = r.left; x < r.right; x++)
|
||||
for(int x = r.x; x < r.z; x++)
|
||||
{
|
||||
(this->*wp)(x, y, c, bp, bw);
|
||||
}
|
||||
}
|
||||
|
||||
if(r.left < clip.left || clip.right < r.right)
|
||||
for(int y = clip.bottom; y < r.w; y++)
|
||||
{
|
||||
for(int x = r.x; x < r.z; x++)
|
||||
{
|
||||
(this->*wp)(x, y, c, bp, bw);
|
||||
}
|
||||
}
|
||||
|
||||
if(r.x < clip.left || clip.right < r.z)
|
||||
{
|
||||
for(int y = clip.top; y < clip.bottom; y += h)
|
||||
{
|
||||
for(int ys = y, ye = y + h; ys < ye; ys++)
|
||||
{
|
||||
for(int x = r.left; x < clip.left; x++)
|
||||
for(int x = r.x; x < clip.left; x++)
|
||||
{
|
||||
(this->*wp)(x, ys, c, bp, bw);
|
||||
}
|
||||
|
||||
for(int x = clip.right; x < r.right; x++)
|
||||
for(int x = clip.right; x < r.z; x++)
|
||||
{
|
||||
(this->*wp)(x, ys, c, bp, bw);
|
||||
}
|
||||
@@ -524,14 +539,6 @@ bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD
|
||||
#endif
|
||||
}
|
||||
|
||||
for(int y = clip.bottom; y < r.bottom; y++)
|
||||
{
|
||||
for(int x = r.left; x < r.right; x++)
|
||||
{
|
||||
(this->*wp)(x, y, c, bp, bw);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1876,6 +1883,53 @@ void GSLocalMemory::ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, con
|
||||
}
|
||||
}
|
||||
|
||||
// 32/8
|
||||
|
||||
void GSLocalMemory::ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
FOREACH_BLOCK_START(16, 16, 8)
|
||||
{
|
||||
ReadBlock8<true>(&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
FOREACH_BLOCK_START(32, 16, 8)
|
||||
{
|
||||
ReadBlock4P(&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture8HP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
FOREACH_BLOCK_START(8, 8, 8)
|
||||
{
|
||||
ReadBlock8HP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
FOREACH_BLOCK_START(8, 8, 8)
|
||||
{
|
||||
ReadBlock4HLP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
FOREACH_BLOCK_START(8, 8, 8)
|
||||
{
|
||||
ReadBlock4HHP((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -59,7 +59,7 @@ public:
|
||||
writeFrameAddr wfa;
|
||||
writeImage wi;
|
||||
readImage ri;
|
||||
readTexture rtx, rtxNP;
|
||||
readTexture rtx, rtxNP, rtxP;
|
||||
DWORD bpp, pal, trbpp;
|
||||
CSize bs, pgs;
|
||||
int* rowOffset[8];
|
||||
@@ -547,21 +547,6 @@ public:
|
||||
WritePixel16(addr, (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3));
|
||||
}
|
||||
|
||||
__forceinline void WritePixel32(DWORD* dst, DWORD addr, DWORD c)
|
||||
{
|
||||
dst[addr] = c;
|
||||
}
|
||||
|
||||
__forceinline void WritePixel24(DWORD* dst, DWORD addr, DWORD c)
|
||||
{
|
||||
dst[addr] = (dst[addr] & 0xff000000) | (c & 0x00ffffff);
|
||||
}
|
||||
|
||||
__forceinline void WritePixel16(WORD* dst, DWORD addr, DWORD c)
|
||||
{
|
||||
dst[addr] = (WORD)c;
|
||||
}
|
||||
|
||||
__forceinline void WritePixel32(int x, int y, DWORD c, DWORD bp, DWORD bw)
|
||||
{
|
||||
WritePixel32(PixelAddress32(x, y, bp, bw), c);
|
||||
@@ -911,244 +896,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline GSVector4i ReadFrameX(int psm, const GSVector4i& addr) const
|
||||
{
|
||||
GSVector4i c, r, g, b, a;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
(int)ReadPixel32(addr.u32[0]),
|
||||
(int)ReadPixel32(addr.u32[1]),
|
||||
(int)ReadPixel32(addr.u32[2]),
|
||||
(int)ReadPixel32(addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
case 1:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
(int)ReadPixel32(addr.u32[0]),
|
||||
(int)ReadPixel32(addr.u32[1]),
|
||||
(int)ReadPixel32(addr.u32[2]),
|
||||
(int)ReadPixel32(addr.u32[3]));
|
||||
#endif
|
||||
c = (c & GSVector4i::x00ffffff(addr)) | GSVector4i::x80000000(addr);
|
||||
break;
|
||||
case 2:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm16);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
(int)ReadPixel16(addr.u32[0]),
|
||||
(int)ReadPixel16(addr.u32[1]),
|
||||
(int)ReadPixel16(addr.u32[2]),
|
||||
(int)ReadPixel16(addr.u32[3]));
|
||||
#endif
|
||||
c = ((c & 0x8000) << 16) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
c = GSVector4i::zero();
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const
|
||||
{
|
||||
GSVector4i z;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
(int)ReadPixel32(addr.u32[0]),
|
||||
(int)ReadPixel32(addr.u32[1]),
|
||||
(int)ReadPixel32(addr.u32[2]),
|
||||
(int)ReadPixel32(addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
case 1:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
(int)ReadPixel32(addr.u32[0]),
|
||||
(int)ReadPixel32(addr.u32[1]),
|
||||
(int)ReadPixel32(addr.u32[2]),
|
||||
(int)ReadPixel32(addr.u32[3]));
|
||||
#endif
|
||||
z = z & GSVector4i::x00ffffff(addr);
|
||||
break;
|
||||
case 2:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm16);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
(int)ReadPixel16(addr.u32[0]),
|
||||
(int)ReadPixel16(addr.u32[1]),
|
||||
(int)ReadPixel16(addr.u32[2]),
|
||||
(int)ReadPixel16(addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
z = GSVector4i::zero();
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
__forceinline void WriteFrameAndZBufX(
|
||||
int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f,
|
||||
int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z,
|
||||
int pixels)
|
||||
{
|
||||
// FIXME: compiler problem or not enough xmm regs in x86 mode to store the address regs (fa, za)
|
||||
|
||||
DWORD* RESTRICT vm32 = m_vm32;
|
||||
WORD* RESTRICT vm16 = m_vm16;
|
||||
|
||||
GSVector4i c = f;
|
||||
|
||||
if(fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = c & 0x00f800f8;
|
||||
GSVector4i ga = c & 0x8000f800;
|
||||
c = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
if(fm.extract32<0>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[0], c.extract32<0>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[0], c.extract32<0>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[0], c.extract16<0 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<0>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[0], z.extract32<0>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[0], z.extract32<0>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[0], z.extract16<0 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 1) return;
|
||||
|
||||
if(fm.extract32<1>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[1], c.extract32<1>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[1], c.extract32<1>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[1], c.extract16<1 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<1>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[1], z.extract32<1>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[1], z.extract32<1>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[1], z.extract16<1 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 2) return;
|
||||
|
||||
if(fm.extract32<2>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[2], c.extract32<2>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[2], c.extract32<2>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[2], c.extract16<2 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<2>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[2], z.extract32<2>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[2], z.extract32<2>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[2], z.extract16<2 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 3) return;
|
||||
|
||||
if(fm.extract32<3>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[3], c.extract32<3>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[3], c.extract32<3>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[3], c.extract16<3 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<3>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[3], z.extract32<3>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[3], z.extract32<3>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[3], z.extract16<3 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if(fm.u32[i] != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[i], c.u32[i]); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[i], c.u32[i]); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[i], c.u16[i * 2]); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.u32[i] != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[i], z.u32[i]); break;
|
||||
case 1: WritePixel24(vm32, za.u32[i], z.u32[i]); break;
|
||||
case 2: WritePixel16(vm16, za.u32[i], z.u16[i * 2]); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
while(++i < pixels);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// FillRect
|
||||
|
||||
bool FillRect(const CRect& r, DWORD c, DWORD psm, DWORD bp, DWORD bw);
|
||||
bool FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw);
|
||||
|
||||
//
|
||||
|
||||
@@ -1212,6 +962,14 @@ public:
|
||||
void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
|
||||
void ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
|
||||
|
||||
// 32/8
|
||||
|
||||
void ReadTexture8P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTexture4P(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTexture8HP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
|
||||
|
||||
//
|
||||
|
||||
static DWORD m_xtbl[1024], m_ytbl[1024];
|
||||
|
||||
@@ -86,7 +86,7 @@ GSRasterizer::~GSRasterizer()
|
||||
m_comap.RemoveAll();
|
||||
}
|
||||
|
||||
int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
int GSRasterizer::Draw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_state->m_env;
|
||||
GSDrawingContext* context = m_state->m_context;
|
||||
@@ -101,15 +101,21 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
|
||||
// m_scissor
|
||||
|
||||
m_scissor.left = max(context->SCISSOR.SCAX0, 0);
|
||||
m_scissor.top = max(context->SCISSOR.SCAY0, 0);
|
||||
m_scissor.right = min(context->SCISSOR.SCAX1 + 1, context->FRAME.FBW * 64);
|
||||
m_scissor.bottom = min(context->SCISSOR.SCAY1 + 1, 4096);
|
||||
m_scissor = context->scissor.in;
|
||||
|
||||
// TODO: find a game that overflows and check which one is the right behaviour
|
||||
|
||||
m_scissor.z = min(m_scissor.z, context->FRAME.FBW * 64);
|
||||
|
||||
// m_sel
|
||||
|
||||
m_sel.dw = 0;
|
||||
|
||||
if(PRIM->AA1)
|
||||
{
|
||||
// TODO: automatic alpha blending (ABE=1, A=0 B=1 C=0 D=1)
|
||||
}
|
||||
|
||||
m_sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
|
||||
m_sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
|
||||
m_sel.ztst = context->TEST.ZTE && context->TEST.ZTST > 1 ? context->TEST.ZTST : context->ZBUF.ZMSK ? 0 : 1;
|
||||
@@ -159,6 +165,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
m_sel.pabe = PRIM->ABE ? env.PABE.PABE : 0;
|
||||
m_sel.rfb = m_sel.date || m_sel.abe != 255 || m_sel.atst != 1 && m_sel.afail == 3 || context->FRAME.FBMSK != 0 && context->FRAME.FBMSK != 0xffffffff;
|
||||
m_sel.wzb = context->DepthWrite();
|
||||
m_sel.tlu = PRIM->TME && GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0 ? 1 : 0;
|
||||
|
||||
m_dsf = m_ds[m_sel.fpsm][m_sel.zpsm][m_sel.ztst][m_sel.iip];
|
||||
|
||||
@@ -212,7 +219,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
SetupColumnOffset();
|
||||
|
||||
m_slenv.steps = 0;
|
||||
m_slenv.rtx = GSLocalMemory::m_psm[context->TEX0.PSM].rtx;
|
||||
m_slenv.vm = m_state->m_mem.m_vm32;
|
||||
m_slenv.fbr = m_fbco->row;
|
||||
m_slenv.zbr = m_zbco->row;
|
||||
m_slenv.fbc = m_fbco->col;
|
||||
@@ -225,9 +232,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
m_slenv.aref = GSVector4i((int)context->TEST.AREF + (m_sel.atst == ATST_LESS ? -1 : m_sel.atst == ATST_GREATER ? +1 : 0));
|
||||
m_slenv.afix = GSVector4((float)(int)context->ALPHA.FIX);
|
||||
m_slenv.afix2 = m_slenv.afix * (2.0f / 256);
|
||||
m_slenv.f.r = GSVector4((float)(int)env.FOGCOL.FCR);
|
||||
m_slenv.f.g = GSVector4((float)(int)env.FOGCOL.FCG);
|
||||
m_slenv.f.b = GSVector4((float)(int)env.FOGCOL.FCB);
|
||||
m_slenv.fc = GSVector4((DWORD)env.FOGCOL.ai32[0]);
|
||||
|
||||
if(m_sel.fpsm == 1)
|
||||
{
|
||||
@@ -236,8 +241,9 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_texture = texture;
|
||||
m_tw = max(3, context->TEX0.TW);
|
||||
m_slenv.tex = texture->m_buff;
|
||||
m_slenv.pal = m_state->m_mem.m_clut;
|
||||
m_slenv.tw = texture->m_tw;
|
||||
|
||||
short tw = (short)(1 << context->TEX0.TW);
|
||||
short th = (short)(1 << context->TEX0.TH);
|
||||
@@ -297,8 +303,6 @@ int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
|
||||
m_slenv.t.min = m_slenv.t.min.xxxxl().xxxxh();
|
||||
m_slenv.t.max = m_slenv.t.max.xxxxl().xxxxh();
|
||||
m_slenv.t.mask = m_slenv.t.mask.xxzz();
|
||||
|
||||
// m_tw = (int)max(context->TEX0.TW, TEXTURE_CACHE_WIDTH);
|
||||
}
|
||||
|
||||
//
|
||||
@@ -355,7 +359,7 @@ void GSRasterizer::DrawPoint(Vertex* v)
|
||||
|
||||
GSVector4i p(v->p);
|
||||
|
||||
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||
if(m_scissor.x <= p.x && p.x < m_scissor.z && m_scissor.y <= p.y && p.y < m_scissor.w)
|
||||
{
|
||||
if((p.y % m_threads) == m_id)
|
||||
{
|
||||
@@ -481,8 +485,8 @@ void GSRasterizer::DrawTriangleSection(Vertex& l, const Vertex& dl, GSVector4& r
|
||||
int top = tb.z;
|
||||
int bottom = tb.w;
|
||||
|
||||
if(top < m_scissor.top) top = m_scissor.top;
|
||||
if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
|
||||
if(top < m_scissor.y) top = m_scissor.y;
|
||||
if(bottom > m_scissor.w) bottom = m_scissor.w;
|
||||
|
||||
if(top < bottom)
|
||||
{
|
||||
@@ -520,8 +524,8 @@ if(scanmsk >= 0)
|
||||
int left = lr.x;
|
||||
int right = lr.y;
|
||||
|
||||
if(left < m_scissor.left) left = m_scissor.left;
|
||||
if(right > m_scissor.right) right = m_scissor.right;
|
||||
if(left < m_scissor.x) left = m_scissor.x;
|
||||
if(right > m_scissor.z) right = m_scissor.z;
|
||||
|
||||
if(right > left)
|
||||
{
|
||||
@@ -554,25 +558,35 @@ void GSRasterizer::DrawSprite(Vertex* vertices)
|
||||
v[1].p = vertices[1].p.blend8(vertices[0].p, mask);
|
||||
v[1].t = vertices[1].t.blend8(vertices[0].t, mask);
|
||||
|
||||
GSVector4i tlbr(v[0].p.xyxy(v[1].p).ceil());
|
||||
GSVector4i r(v[0].p.xyxy(v[1].p).ceil());
|
||||
|
||||
int top = tlbr.y;
|
||||
int bottom = tlbr.w;
|
||||
int& top = r.y;
|
||||
int& bottom = r.w;
|
||||
|
||||
if(top < m_scissor.top) top = m_scissor.top;
|
||||
if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
|
||||
int& left = r.x;
|
||||
int& right = r.z;
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
r = r.sat_i32(m_scissor);
|
||||
|
||||
if((r < r.zwzw()).mask() != 0x00ff) return;
|
||||
|
||||
#else
|
||||
|
||||
if(top < m_scissor.y) top = m_scissor.y;
|
||||
if(bottom > m_scissor.w) bottom = m_scissor.w;
|
||||
if(top >= bottom) return;
|
||||
|
||||
int left = tlbr.x;
|
||||
int right = tlbr.z;
|
||||
|
||||
if(left < m_scissor.left) left = m_scissor.left;
|
||||
if(right > m_scissor.right) right = m_scissor.right;
|
||||
if(left < m_scissor.x) left = m_scissor.x;
|
||||
if(right > m_scissor.z) right = m_scissor.z;
|
||||
if(left >= right) return;
|
||||
|
||||
#endif
|
||||
|
||||
Vertex scan = v[0];
|
||||
|
||||
if(DrawSolidRect(left, top, right, bottom, scan))
|
||||
if(DrawSolidRect(r, scan))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -618,9 +632,9 @@ void GSRasterizer::DrawSprite(Vertex* vertices)
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRasterizer::DrawSolidRect(int left, int top, int right, int bottom, const Vertex& v)
|
||||
bool GSRasterizer::DrawSolidRect(const GSVector4i& r, const Vertex& v)
|
||||
{
|
||||
if(left >= right || top >= bottom || !m_solidrect)
|
||||
if(r.x >= r.z || r.y >= r.w || !m_solidrect)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -630,10 +644,8 @@ bool GSRasterizer::DrawSolidRect(int left, int top, int right, int bottom, const
|
||||
return true;
|
||||
}
|
||||
|
||||
ASSERT(top >= 0);
|
||||
ASSERT(bottom >= 0);
|
||||
|
||||
CRect r(left, top, right, bottom);
|
||||
ASSERT(r.y >= 0);
|
||||
ASSERT(r.w >= 0);
|
||||
|
||||
GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
@@ -827,9 +839,6 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
|
||||
continue;
|
||||
}
|
||||
|
||||
// DWORD mask = (DWORD)(((int)steps - 4) >> 31);
|
||||
// int pixels = (steps & mask) | (4 & ~mask);
|
||||
|
||||
int pixels = GSVector4i::store(GSVector4i::load(steps).min_i16(GSVector4i::load(4)));
|
||||
|
||||
GSVector4 c[12];
|
||||
@@ -853,7 +862,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
|
||||
}
|
||||
}
|
||||
|
||||
SampleTexture(ztst, test, pixels, m_sel.ltf, u, v, c);
|
||||
SampleTexture(ztst, test, pixels, m_sel.ltf, m_sel.tlu, u, v, c);
|
||||
}
|
||||
|
||||
AlphaTFX(m_sel.tfx, m_sel.tcc, a, c[3]);
|
||||
@@ -879,7 +888,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
|
||||
|
||||
if(m_sel.rfb)
|
||||
{
|
||||
d = m_state->m_mem.ReadFrameX(fpsm == 1 ? 0 : fpsm, fa);
|
||||
d = ReadFrameX(fpsm == 1 ? 0 : fpsm, fa);
|
||||
|
||||
if(fpsm != 1 && m_sel.date)
|
||||
{
|
||||
@@ -957,7 +966,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
|
||||
s = s.blend(d, fm);
|
||||
}
|
||||
|
||||
m_state->m_mem.WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
|
||||
WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
|
||||
|
||||
}
|
||||
while(0);
|
||||
@@ -989,8 +998,12 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, const GSVector4& u, const GSVector4& v, GSVector4* c)
|
||||
void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD tlu, const GSVector4& u, const GSVector4& v, GSVector4* c)
|
||||
{
|
||||
const void* RESTRICT tex = m_slenv.tex;
|
||||
const DWORD* RESTRICT pal = m_slenv.pal;
|
||||
const DWORD tw = m_slenv.tw;
|
||||
|
||||
if(ltf)
|
||||
{
|
||||
GSVector4 uf = u.floor();
|
||||
@@ -1006,26 +1019,51 @@ void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels,
|
||||
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
if(tlu)
|
||||
{
|
||||
if(ztst > 1 && test.u32[i])
|
||||
do
|
||||
{
|
||||
continue;
|
||||
if(ztst > 1 && test.u32[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
GSVector4 c00 = GSVector4(pal[((const BYTE*)tex)[(uv0.u16[i + 4] << tw) + uv0.u16[i]]]);
|
||||
GSVector4 c01 = GSVector4(pal[((const BYTE*)tex)[(uv0.u16[i + 4] << tw) + uv1.u16[i]]]);
|
||||
GSVector4 c10 = GSVector4(pal[((const BYTE*)tex)[(uv1.u16[i + 4] << tw) + uv0.u16[i]]]);
|
||||
GSVector4 c11 = GSVector4(pal[((const BYTE*)tex)[(uv1.u16[i + 4] << tw) + uv1.u16[i]]]);
|
||||
|
||||
c00 = c00.lerp(c01, uff.v[i]);
|
||||
c10 = c10.lerp(c11, uff.v[i]);
|
||||
c00 = c00.lerp(c10, vff.v[i]);
|
||||
|
||||
c[i] = c00;
|
||||
|
||||
}
|
||||
|
||||
GSVector4 c00(ReadTexel(uv0.u16[i], uv0.u16[i + 4]));
|
||||
GSVector4 c01(ReadTexel(uv1.u16[i], uv0.u16[i + 4]));
|
||||
GSVector4 c10(ReadTexel(uv0.u16[i], uv1.u16[i + 4]));
|
||||
GSVector4 c11(ReadTexel(uv1.u16[i], uv1.u16[i + 4]));
|
||||
|
||||
c00 = c00.lerp(c01, uff.v[i]);
|
||||
c10 = c10.lerp(c11, uff.v[i]);
|
||||
c00 = c00.lerp(c10, vff.v[i]);
|
||||
|
||||
c[i] = c00;
|
||||
|
||||
while(++i < pixels);
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
if(ztst > 1 && test.u32[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
GSVector4 c00 = GSVector4(((const DWORD*)tex)[(uv0.u16[i + 4] << tw) + uv0.u16[i]]);
|
||||
GSVector4 c01 = GSVector4(((const DWORD*)tex)[(uv0.u16[i + 4] << tw) + uv1.u16[i]]);
|
||||
GSVector4 c10 = GSVector4(((const DWORD*)tex)[(uv1.u16[i + 4] << tw) + uv0.u16[i]]);
|
||||
GSVector4 c11 = GSVector4(((const DWORD*)tex)[(uv1.u16[i + 4] << tw) + uv1.u16[i]]);
|
||||
|
||||
c00 = c00.lerp(c01, uff.v[i]);
|
||||
c10 = c10.lerp(c11, uff.v[i]);
|
||||
c00 = c00.lerp(c10, vff.v[i]);
|
||||
|
||||
c[i] = c00;
|
||||
}
|
||||
while(++i < pixels);
|
||||
}
|
||||
while(++i < pixels);
|
||||
|
||||
GSVector4::transpose(c[0], c[1], c[2], c[3]);
|
||||
}
|
||||
@@ -1037,16 +1075,32 @@ void GSRasterizer::SampleTexture(DWORD ztst, const GSVector4i& test, int pixels,
|
||||
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
if(tlu)
|
||||
{
|
||||
if(ztst > 1 && test.u32[i])
|
||||
do
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if(ztst > 1 && test.u32[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
c00.u32[i] = ReadTexel(uv.u16[i], uv.u16[i + 4]);
|
||||
c00.u32[i] = pal[((const BYTE*)tex)[(uv.u16[i + 4] << tw) + uv.u16[i]]];
|
||||
}
|
||||
while(++i < pixels);
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
if(ztst > 1 && test.u32[i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
c00.u32[i] = ((const DWORD*)tex)[(uv.u16[i + 4] << tw) + uv.u16[i]];
|
||||
}
|
||||
while(++i < pixels);
|
||||
}
|
||||
while(++i < pixels);
|
||||
|
||||
// GSVector4::expand(c00, c[0], c[1], c[2], c[3]);
|
||||
|
||||
@@ -1105,16 +1159,18 @@ void GSRasterizer::AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4& af, GSVector4
|
||||
|
||||
void GSRasterizer::Fog(const GSVector4& f, GSVector4& r, GSVector4& g, GSVector4& b)
|
||||
{
|
||||
r = m_slenv.f.r.lerp(r, f);
|
||||
g = m_slenv.f.g.lerp(g, f);
|
||||
b = m_slenv.f.b.lerp(b, f);
|
||||
GSVector4 fc = m_slenv.fc;
|
||||
|
||||
r = fc.xxxx().lerp(r, f);
|
||||
g = fc.yyyy().lerp(g, f);
|
||||
b = fc.zzzz().lerp(b, f);
|
||||
}
|
||||
|
||||
bool GSRasterizer::TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& za, GSVector4i& test)
|
||||
{
|
||||
if(ztst > 1)
|
||||
{
|
||||
GSVector4i zd = m_state->m_mem.ReadZBufX(zpsm, za);
|
||||
GSVector4i zd = ReadZBufX(zpsm, za);
|
||||
|
||||
GSVector4i zso = zs;
|
||||
GSVector4i zdo = zd;
|
||||
@@ -1192,6 +1248,277 @@ bool GSRasterizer::TestAlpha(DWORD atst, DWORD afail, const GSVector4& a, GSVect
|
||||
return true;
|
||||
}
|
||||
|
||||
DWORD GSRasterizer::ReadPixel32(DWORD* RESTRICT vm, DWORD addr)
|
||||
{
|
||||
return vm[addr];
|
||||
}
|
||||
|
||||
DWORD GSRasterizer::ReadPixel24(DWORD* RESTRICT vm, DWORD addr)
|
||||
{
|
||||
return vm[addr] & 0x00ffffff;
|
||||
}
|
||||
|
||||
DWORD GSRasterizer::ReadPixel16(WORD* RESTRICT vm, DWORD addr)
|
||||
{
|
||||
return (DWORD)vm[addr];
|
||||
}
|
||||
|
||||
void GSRasterizer::WritePixel32(DWORD* RESTRICT vm, DWORD addr, DWORD c)
|
||||
{
|
||||
vm[addr] = c;
|
||||
}
|
||||
|
||||
void GSRasterizer::WritePixel24(DWORD* RESTRICT vm, DWORD addr, DWORD c)
|
||||
{
|
||||
vm[addr] = (vm[addr] & 0xff000000) | (c & 0x00ffffff);
|
||||
}
|
||||
|
||||
void GSRasterizer::WritePixel16(WORD* RESTRICT vm, DWORD addr, DWORD c)
|
||||
{
|
||||
vm[addr] = (WORD)c;
|
||||
}
|
||||
|
||||
GSVector4i GSRasterizer::ReadFrameX(int psm, const GSVector4i& addr) const
|
||||
{
|
||||
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
|
||||
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
|
||||
|
||||
GSVector4i c, r, g, b, a;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
ReadPixel32(vm32, addr.u32[0]),
|
||||
ReadPixel32(vm32, addr.u32[1]),
|
||||
ReadPixel32(vm32, addr.u32[2]),
|
||||
ReadPixel32(vm32, addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
case 1:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
ReadPixel32(vm32, addr.u32[0]),
|
||||
ReadPixel32(vm32, addr.u32[1]),
|
||||
ReadPixel32(vm32, addr.u32[2]),
|
||||
ReadPixel32(vm32, addr.u32[3]));
|
||||
#endif
|
||||
c = (c & GSVector4i::x00ffffff(addr)) | GSVector4i::x80000000(addr);
|
||||
break;
|
||||
case 2:
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(vm16);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
ReadPixel16(vm16, addr.u32[0]),
|
||||
ReadPixel16(vm16, addr.u32[1]),
|
||||
ReadPixel16(vm16, addr.u32[2]),
|
||||
ReadPixel16(vm16, addr.u32[3]));
|
||||
#endif
|
||||
c = ((c & 0x8000) << 16) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
c = GSVector4i::zero();
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
GSVector4i GSRasterizer::ReadZBufX(int psm, const GSVector4i& addr) const
|
||||
{
|
||||
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
|
||||
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
|
||||
|
||||
GSVector4i z;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case 0:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(vm32);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
ReadPixel32(vm32, addr.u32[0]),
|
||||
ReadPixel32(vm32, addr.u32[1]),
|
||||
ReadPixel32(vm32, addr.u32[2]),
|
||||
ReadPixel32(vm32, addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
case 1:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(vm32);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
ReadPixel32(vm32, addr.u32[0]),
|
||||
ReadPixel32(vm32, addr.u32[1]),
|
||||
ReadPixel32(vm32, addr.u32[2]),
|
||||
ReadPixel32(vm32, addr.u32[3]));
|
||||
#endif
|
||||
z = z & GSVector4i::x00ffffff(addr);
|
||||
break;
|
||||
case 2:
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(vm16);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
ReadPixel16(vm16, addr.u32[0]),
|
||||
ReadPixel16(vm16, addr.u32[1]),
|
||||
ReadPixel16(vm16, addr.u32[2]),
|
||||
ReadPixel16(vm16, addr.u32[3]));
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
z = GSVector4i::zero();
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
void GSRasterizer::WriteFrameAndZBufX(
|
||||
int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f,
|
||||
int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z,
|
||||
int pixels)
|
||||
{
|
||||
// FIXME: compiler problem or not enough xmm regs in x86 mode to store the address regs (fa, za)
|
||||
|
||||
DWORD* RESTRICT vm32 = (DWORD*)m_slenv.vm;
|
||||
WORD* RESTRICT vm16 = (WORD*)m_slenv.vm;
|
||||
|
||||
GSVector4i c = f;
|
||||
|
||||
if(fpsm == 2)
|
||||
{
|
||||
GSVector4i rb = c & 0x00f800f8;
|
||||
GSVector4i ga = c & 0x8000f800;
|
||||
c = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
if(fm.extract32<0>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[0], c.extract32<0>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[0], c.extract32<0>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[0], c.extract16<0 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<0>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[0], z.extract32<0>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[0], z.extract32<0>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[0], z.extract16<0 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 1) return;
|
||||
|
||||
if(fm.extract32<1>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[1], c.extract32<1>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[1], c.extract32<1>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[1], c.extract16<1 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<1>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[1], z.extract32<1>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[1], z.extract32<1>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[1], z.extract16<1 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 2) return;
|
||||
|
||||
if(fm.extract32<2>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[2], c.extract32<2>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[2], c.extract32<2>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[2], c.extract16<2 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<2>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[2], z.extract32<2>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[2], z.extract32<2>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[2], z.extract16<2 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(pixels <= 3) return;
|
||||
|
||||
if(fm.extract32<3>() != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[3], c.extract32<3>()); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[3], c.extract32<3>()); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[3], c.extract16<3 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.extract32<3>() != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[3], z.extract32<3>()); break;
|
||||
case 1: WritePixel24(vm32, za.u32[3], z.extract32<3>()); break;
|
||||
case 2: WritePixel16(vm16, za.u32[3], z.extract16<3 * 2>()); break;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
if(fm.u32[i] != 0xffffffff)
|
||||
{
|
||||
switch(fpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, fa.u32[i], c.u32[i]); break;
|
||||
case 1: WritePixel24(vm32, fa.u32[i], c.u32[i]); break;
|
||||
case 2: WritePixel16(vm16, fa.u32[i], c.u16[i * 2]); break;
|
||||
}
|
||||
}
|
||||
|
||||
if(zm.u32[i] != 0xffffffff)
|
||||
{
|
||||
switch(zpsm)
|
||||
{
|
||||
case 0: WritePixel32(vm32, za.u32[i], z.u32[i]); break;
|
||||
case 1: WritePixel24(vm32, za.u32[i], z.u32[i]); break;
|
||||
case 2: WritePixel16(vm16, za.u32[i], z.u16[i * 2]); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
while(++i < pixels);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSRasterizerMT::GSRasterizerMT(GSState* state, int id, int threads, long* sync)
|
||||
@@ -1221,7 +1548,7 @@ GSRasterizerMT::~GSRasterizerMT()
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count, DWORD* texture)
|
||||
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture)
|
||||
{
|
||||
m_vertices = vertices;
|
||||
m_count = count;
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#include "GSState.h"
|
||||
#include "GSVertexSW.h"
|
||||
#include "GSTextureCacheSW.h"
|
||||
#include "GSAlignedClass.h"
|
||||
|
||||
class GSRasterizer : public GSAlignedClass<16>
|
||||
@@ -34,9 +35,6 @@ protected:
|
||||
int m_id;
|
||||
int m_threads;
|
||||
|
||||
DWORD* m_texture;
|
||||
DWORD m_tw;
|
||||
|
||||
private:
|
||||
struct ColumnOffset
|
||||
{
|
||||
@@ -49,7 +47,11 @@ private:
|
||||
{
|
||||
int steps;
|
||||
|
||||
GSLocalMemory::readTexture rtx;
|
||||
void* vm;
|
||||
|
||||
const void* tex;
|
||||
const DWORD* pal;
|
||||
DWORD tw;
|
||||
|
||||
GSVector4i* fbr;
|
||||
GSVector4i* zbr;
|
||||
@@ -64,7 +66,7 @@ private:
|
||||
GSVector4i aref;
|
||||
GSVector4 afix;
|
||||
GSVector4 afix2;
|
||||
struct {GSVector4 r, g, b;} f;
|
||||
GSVector4 fc;
|
||||
|
||||
GSVector4 dp, dp4;
|
||||
GSVector4 dt, dt4;
|
||||
@@ -94,6 +96,7 @@ private:
|
||||
DWORD pabe:1; // 28
|
||||
DWORD rfb:1; // 29
|
||||
DWORD wzb:1; // 30
|
||||
DWORD tlu:1; // 31
|
||||
};
|
||||
|
||||
struct
|
||||
@@ -105,10 +108,10 @@ private:
|
||||
|
||||
DWORD dw;
|
||||
|
||||
operator DWORD() {return dw & 0x7fffffff;}
|
||||
operator DWORD() {return dw;}// & 0x7fffffff;}
|
||||
};
|
||||
|
||||
CRect m_scissor;
|
||||
GSVector4i m_scissor;
|
||||
CRBMapC<DWORD, ColumnOffset*> m_comap;
|
||||
ColumnOffset* m_fbco;
|
||||
ColumnOffset* m_zbco;
|
||||
@@ -134,17 +137,23 @@ private:
|
||||
template<DWORD sel>
|
||||
void DrawScanlineEx(int top, int left, int right, const Vertex& v);
|
||||
|
||||
__forceinline void SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, const GSVector4& u, const GSVector4& v, GSVector4* c);
|
||||
__forceinline void SampleTexture(DWORD ztst, const GSVector4i& test, int pixels, DWORD ltf, DWORD pal, const GSVector4& u, const GSVector4& v, GSVector4* c);
|
||||
__forceinline void ColorTFX(DWORD tfx, const GSVector4& rf, const GSVector4& gf, const GSVector4& bf, const GSVector4& af, GSVector4& rt, GSVector4& gt, GSVector4& bt);
|
||||
__forceinline void AlphaTFX(DWORD tfx, DWORD tcc, const GSVector4& af, GSVector4& at);
|
||||
__forceinline void Fog(const GSVector4& f, GSVector4& r, GSVector4& g, GSVector4& b);
|
||||
__forceinline bool TestZ(DWORD zpsm, DWORD ztst, const GSVector4i& zs, const GSVector4i& za, GSVector4i& test);
|
||||
__forceinline bool TestAlpha(DWORD atst, DWORD afail, const GSVector4& a, GSVector4i& fm, GSVector4i& zm, GSVector4i& test);
|
||||
|
||||
__forceinline DWORD ReadTexel(int x, int y)
|
||||
{
|
||||
return m_texture[(y << m_tw) + x];
|
||||
}
|
||||
__forceinline static DWORD ReadPixel32(DWORD* RESTRICT vm, DWORD addr);
|
||||
__forceinline static DWORD ReadPixel24(DWORD* RESTRICT vm, DWORD addr);
|
||||
__forceinline static DWORD ReadPixel16(WORD* RESTRICT vm, DWORD addr);
|
||||
__forceinline static void WritePixel32(DWORD* RESTRICT vm, DWORD addr, DWORD c);
|
||||
__forceinline static void WritePixel24(DWORD* RESTRICT vm, DWORD addr, DWORD c);
|
||||
__forceinline static void WritePixel16(WORD* RESTRICT vm, DWORD addr, DWORD c);
|
||||
|
||||
__forceinline GSVector4i ReadFrameX(int psm, const GSVector4i& addr) const;
|
||||
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const;
|
||||
__forceinline void WriteFrameAndZBufX(int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f, int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z, int pixels);
|
||||
|
||||
__forceinline GSVector4i Wrap(const GSVector4i& t)
|
||||
{
|
||||
@@ -158,7 +167,7 @@ private:
|
||||
void DrawLine(Vertex* v);
|
||||
void DrawTriangle(Vertex* v);
|
||||
void DrawSprite(Vertex* v);
|
||||
bool DrawSolidRect(int left, int top, int right, int bottom, const Vertex& v);
|
||||
bool DrawSolidRect(const GSVector4i& r, const Vertex& v);
|
||||
|
||||
__forceinline void DrawTriangleSection(Vertex& l, const Vertex& dl, GSVector4& r, const GSVector4& dr, const GSVector4& b, const Vertex& dscan);
|
||||
|
||||
@@ -166,13 +175,14 @@ public:
|
||||
GSRasterizer(GSState* state, int id = 0, int threads = 0);
|
||||
virtual ~GSRasterizer();
|
||||
|
||||
int Draw(Vertex* v, int count, DWORD* texture);
|
||||
int Draw(Vertex* v, int count, const GSTextureCacheSW::GSTexture* texture);
|
||||
};
|
||||
|
||||
class GSRasterizerMT : public GSRasterizer
|
||||
{
|
||||
Vertex* m_vertices;
|
||||
int m_count;
|
||||
const GSTextureCacheSW::GSTexture* m_texture;
|
||||
long* m_sync;
|
||||
bool m_exit;
|
||||
DWORD m_ThreadId;
|
||||
@@ -186,5 +196,5 @@ public:
|
||||
GSRasterizerMT(GSState* state, int id, int threads, long* sync);
|
||||
virtual ~GSRasterizerMT();
|
||||
|
||||
void BeginDraw(Vertex* vertices, int count, DWORD* texture);
|
||||
void BeginDraw(Vertex* vertices, int count, const GSTextureCacheSW::GSTexture* texture);
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -444,7 +444,7 @@ void GSRendererHW10::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache<Dev
|
||||
int w = rt.GetWidth();
|
||||
int h = rt.GetHeight();
|
||||
|
||||
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.hw) & CRect(0, 0, w, h);
|
||||
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.in) & CRect(0, 0, w, h);
|
||||
|
||||
//
|
||||
|
||||
|
||||
@@ -414,7 +414,7 @@ void GSRendererHW9::Draw(int prim, Texture& rt, Texture& ds, GSTextureCache<Devi
|
||||
int w = rt.GetWidth();
|
||||
int h = rt.GetHeight();
|
||||
|
||||
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.hw) & CRect(0, 0, w, h);
|
||||
CRect scissor = (CRect)GSVector4i(GSVector4(rt.m_scale).xyxy() * context->scissor.in) & CRect(0, 0, w, h);
|
||||
|
||||
//
|
||||
|
||||
|
||||
@@ -155,7 +155,7 @@ protected:
|
||||
|
||||
__forceinline int ScissorTest(const GSVector4& p0, const GSVector4& p1)
|
||||
{
|
||||
GSVector4 scissor = m_context->scissor.sw;
|
||||
GSVector4 scissor = m_context->scissor.ex;
|
||||
|
||||
GSVector4 v0 = p0 < scissor;
|
||||
GSVector4 v1 = p1 > scissor.zwxy();
|
||||
@@ -226,20 +226,24 @@ protected:
|
||||
{
|
||||
// TODO: lot to optimize here
|
||||
|
||||
DWORD* texture = NULL;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
const GSTextureCacheSW::GSTexture* texture = NULL;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
int w = 1 << m_context->TEX0.TW;
|
||||
int h = 1 << m_context->TEX0.TH;
|
||||
m_mem.m_clut.Read32(context->TEX0, m_env.TEXA);
|
||||
|
||||
int wms = m_context->CLAMP.WMS;
|
||||
int wmt = m_context->CLAMP.WMT;
|
||||
int w = 1 << context->TEX0.TW;
|
||||
int h = 1 << context->TEX0.TH;
|
||||
|
||||
int minu = (int)m_context->CLAMP.MINU;
|
||||
int minv = (int)m_context->CLAMP.MINV;
|
||||
int maxu = (int)m_context->CLAMP.MAXU;
|
||||
int maxv = (int)m_context->CLAMP.MAXV;
|
||||
int wms = context->CLAMP.WMS;
|
||||
int wmt = context->CLAMP.WMT;
|
||||
|
||||
int minu = (int)context->CLAMP.MINU;
|
||||
int minv = (int)context->CLAMP.MINV;
|
||||
int maxu = (int)context->CLAMP.MAXU;
|
||||
int maxv = (int)context->CLAMP.MAXV;
|
||||
|
||||
CRect r = CRect(0, 0, w, h);
|
||||
|
||||
@@ -281,7 +285,7 @@ protected:
|
||||
|
||||
r &= CRect(0, 0, w, h);
|
||||
|
||||
texture = m_tc->Lookup(m_context->TEX0, m_env.TEXA, &r);
|
||||
texture = m_tc->Lookup(context->TEX0, m_env.TEXA, &r);
|
||||
|
||||
if(!texture) {ASSERT(0); return;}
|
||||
}
|
||||
@@ -323,10 +327,10 @@ protected:
|
||||
{
|
||||
CRect r;
|
||||
|
||||
r.left = max(m_context->SCISSOR.SCAX0, 0);
|
||||
r.top = max(m_context->SCISSOR.SCAY0, 0);
|
||||
r.right = min(m_context->SCISSOR.SCAX1 + 1, m_context->FRAME.FBW * 64);
|
||||
r.bottom = min(m_context->SCISSOR.SCAY1 + 1, 4096);
|
||||
r.left = max(context->SCISSOR.SCAX0, 0);
|
||||
r.top = max(context->SCISSOR.SCAY0, 0);
|
||||
r.right = min(context->SCISSOR.SCAX1 + 1, context->FRAME.FBW * 64);
|
||||
r.bottom = min(context->SCISSOR.SCAY1 + 1, 4096);
|
||||
|
||||
GSVector4 minv(+1e10f);
|
||||
GSVector4 maxv(-1e10f);
|
||||
@@ -348,16 +352,16 @@ protected:
|
||||
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.DBP = m_context->FRAME.Block();
|
||||
BITBLTBUF.DBW = m_context->FRAME.FBW;
|
||||
BITBLTBUF.DPSM = m_context->FRAME.PSM;
|
||||
BITBLTBUF.DBP = context->FRAME.Block();
|
||||
BITBLTBUF.DBW = context->FRAME.FBW;
|
||||
BITBLTBUF.DPSM = context->FRAME.PSM;
|
||||
|
||||
m_tc->InvalidateVideoMem(BITBLTBUF, r);
|
||||
|
||||
if(m_context->DepthWrite())
|
||||
if(context->DepthWrite())
|
||||
{
|
||||
BITBLTBUF.DBP = m_context->ZBUF.Block();
|
||||
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
|
||||
BITBLTBUF.DBP = context->ZBUF.Block();
|
||||
BITBLTBUF.DPSM = context->ZBUF.PSM;
|
||||
|
||||
m_tc->InvalidateVideoMem(BITBLTBUF, r);
|
||||
}
|
||||
|
||||
@@ -37,14 +37,11 @@ GSTextureCacheSW::~GSTextureCacheSW()
|
||||
RemoveAll();
|
||||
}
|
||||
|
||||
DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
|
||||
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
|
||||
{
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
||||
mem.m_clut.Read32(TEX0, TEXA);
|
||||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
const DWORD* clut = mem.m_clut;
|
||||
|
||||
const CAtlList<GSTexturePage*>& t2p = m_p2t[TEX0.TBP0 >> 5];
|
||||
|
||||
@@ -71,11 +68,6 @@ DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
|
||||
continue;
|
||||
}
|
||||
|
||||
if(psm.pal > 0 && !GSVector4i::compare(t2->m_clut, clut, psm.pal * sizeof(clut[0])))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// fprintf(m_log, "cache hit\n");
|
||||
|
||||
t = t2;
|
||||
@@ -134,7 +126,7 @@ DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return t->m_texture;
|
||||
return t;
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::RemoveAll()
|
||||
@@ -239,8 +231,8 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
|
||||
|
||||
GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
|
||||
: m_state(state)
|
||||
, m_texture(NULL)
|
||||
, m_clut(NULL)
|
||||
, m_buff(NULL)
|
||||
, m_tw(0)
|
||||
, m_maxpages(0)
|
||||
, m_pages(0)
|
||||
, m_pos(NULL)
|
||||
@@ -251,14 +243,9 @@ GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
|
||||
|
||||
GSTextureCacheSW::GSTexture::~GSTexture()
|
||||
{
|
||||
if(m_texture)
|
||||
if(m_buff)
|
||||
{
|
||||
_aligned_free(m_texture);
|
||||
}
|
||||
|
||||
if(m_clut)
|
||||
{
|
||||
_aligned_free(m_clut);
|
||||
_aligned_free(m_buff);
|
||||
}
|
||||
|
||||
POSITION pos = m_p2te.GetHeadPosition();
|
||||
@@ -294,7 +281,6 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
const DWORD* clut = mem.m_clut;
|
||||
|
||||
int tw = 1 << TEX0.TW;
|
||||
int th = 1 << TEX0.TH;
|
||||
@@ -302,28 +288,18 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
||||
if(tw < psm.bs.cx) tw = psm.bs.cx;
|
||||
if(th < psm.bs.cy) th = psm.bs.cy;
|
||||
|
||||
if(m_texture == NULL)
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
// fprintf(m_log, "up new (%d %d)\n", tw, th);
|
||||
|
||||
m_texture = (DWORD*)_aligned_malloc(tw * th * sizeof(DWORD), 16);
|
||||
m_buff = _aligned_malloc(tw * th * sizeof(DWORD), 16);
|
||||
|
||||
if(m_texture == NULL)
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_clut = (DWORD*)_aligned_malloc(256 * sizeof(DWORD), 16);
|
||||
|
||||
if(m_clut == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(psm.pal > 0)
|
||||
{
|
||||
memcpy(m_clut, clut, psm.pal * sizeof(clut[0]));
|
||||
}
|
||||
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
}
|
||||
|
||||
CRect r2;
|
||||
@@ -336,12 +312,17 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
||||
r2.bottom = (r->bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
|
||||
}
|
||||
|
||||
DWORD* texture = m_texture;
|
||||
// TODO
|
||||
|
||||
DWORD pitch = 1 << max(3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_texture
|
||||
GSLocalMemory::readTexture rt = psm.pal > 0 ? psm.rtxP : psm.rtx;
|
||||
int bytes = psm.pal > 0 ? 1 : 4;
|
||||
|
||||
BYTE* dst = (BYTE*)m_buff;
|
||||
|
||||
DWORD pitch = (1 << m_tw) * bytes;
|
||||
DWORD mask = pitch - 1;
|
||||
|
||||
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, texture += pitch * psm.pgs.cy)
|
||||
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, dst += pitch * psm.pgs.cy)
|
||||
{
|
||||
if(m_valid[j] == mask)
|
||||
{
|
||||
@@ -385,9 +366,9 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
||||
|
||||
// fprintf(m_log, "up fetch (%d %d) (%d %d %d %d)\n", j, i, r.left, r.top, r.right, r.bottom);
|
||||
|
||||
(mem.*psm.rtx)(r, (BYTE*)&texture[x], pitch * 4, TEX0, TEXA);
|
||||
(mem.*rt)(r, &dst[x * bytes], pitch, TEX0, TEXA);
|
||||
|
||||
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
|
||||
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,8 +51,8 @@ public:
|
||||
GSState* m_state;
|
||||
GIFRegTEX0 m_TEX0;
|
||||
GIFRegTEXA m_TEXA;
|
||||
DWORD* m_texture;
|
||||
DWORD* m_clut;
|
||||
void* m_buff;
|
||||
DWORD m_tw;
|
||||
DWORD m_valid[32];
|
||||
DWORD m_maxpages;
|
||||
DWORD m_pages;
|
||||
@@ -75,7 +75,7 @@ public:
|
||||
GSTextureCacheSW(GSState* state);
|
||||
virtual ~GSTextureCacheSW();
|
||||
|
||||
DWORD* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
|
||||
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
|
||||
|
||||
void RemoveAll();
|
||||
void IncAge();
|
||||
|
||||
@@ -1570,8 +1570,6 @@ public:
|
||||
|
||||
__declspec(align(16)) class GSVector4
|
||||
{
|
||||
static const __m128 m_ps0123;
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
@@ -1590,6 +1588,8 @@ public:
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
static const __m128 m_ps0123;
|
||||
|
||||
GSVector4()
|
||||
{
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type)
|
||||
{
|
||||
const UINT32 revision = 0;
|
||||
const UINT32 build = 1;
|
||||
const UINT32 minor = 9;
|
||||
const UINT32 minor = 10;
|
||||
|
||||
return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (minor << 24);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user