This commit is contained in:
gabest
2008-08-06 04:49:39 +00:00
parent fe2e833daf
commit af56bd9ab7
11 changed files with 770 additions and 246 deletions

View File

@@ -183,7 +183,7 @@ union name \
UINT64 i64; \
UINT32 ai32[2]; \
void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \
operator GSVector4i() {return GSVector4i::loadl(this);} \
operator GSVector4i() const {return GSVector4i::loadl(this);} \
struct { \
#define REG128(name)\

View File

@@ -75,8 +75,6 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
GSLocalMemory::GSLocalMemory()
: m_clut(this)
{
// TODO: MEM_WRITE_WATCH
m_vm8 = (BYTE*)VirtualAlloc(NULL, m_vmsize * 2, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
memset(m_vm8, 0, m_vmsize);
@@ -155,6 +153,7 @@ GSLocalMemory::GSLocalMemory()
m_psm[i].pa = &GSLocalMemory::PixelAddress32;
m_psm[i].ba = &GSLocalMemory::BlockAddress32;
m_psm[i].pga = &GSLocalMemory::PageAddress32;
m_psm[i].pgn = &GSLocalMemory::PageNumber32;
m_psm[i].rp = &GSLocalMemory::ReadPixel32;
m_psm[i].rpa = &GSLocalMemory::ReadPixel32;
m_psm[i].wp = &GSLocalMemory::WritePixel32;
@@ -199,6 +198,13 @@ GSLocalMemory::GSLocalMemory()
m_psm[PSM_PSMT8].pga = &GSLocalMemory::PageAddress8;
m_psm[PSM_PSMT4].pga = &GSLocalMemory::PageAddress4;
m_psm[PSM_PSMCT16].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMCT16S].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMZ16].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMZ16S].pgn = &GSLocalMemory::PageNumber16;
m_psm[PSM_PSMT8].pgn = &GSLocalMemory::PageNumber8;
m_psm[PSM_PSMT4].pgn = &GSLocalMemory::PageNumber4;
m_psm[PSM_PSMCT24].rp = &GSLocalMemory::ReadPixel24;
m_psm[PSM_PSMCT16].rp = &GSLocalMemory::ReadPixel16;
m_psm[PSM_PSMCT16S].rp = &GSLocalMemory::ReadPixel16S;
@@ -1401,7 +1407,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBL
///////////////////
void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(8, 8, 32)
{
@@ -1411,7 +1417,7 @@ void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRe
}
void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
if(TEXA.AEM)
{
@@ -1431,7 +1437,7 @@ void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRe
}
}
void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
@@ -1444,7 +1450,7 @@ void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRe
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
@@ -1457,7 +1463,7 @@ void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFR
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1468,7 +1474,7 @@ void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFReg
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const UINT64* pal = m_clut;
@@ -1479,7 +1485,7 @@ void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFReg
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1490,7 +1496,7 @@ void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRe
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1501,7 +1507,7 @@ void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFR
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1512,7 +1518,7 @@ void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFR
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(8, 8, 32)
{
@@ -1521,7 +1527,7 @@ void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
if(TEXA.AEM)
{
@@ -1541,7 +1547,7 @@ void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
}
}
void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
@@ -1554,7 +1560,7 @@ void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
__declspec(align(16)) WORD block[16 * 8];
@@ -1569,7 +1575,7 @@ void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, GIF
///////////////////
void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP)
{
readTexture rtx = m_psm[TEX0.PSM].rtx;
readTexel rt = m_psm[TEX0.PSM].rt;
@@ -1588,7 +1594,7 @@ void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegT
}
}
void GSLocalMemory::ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
void GSLocalMemory::ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP)
{
readTexture rtx = m_psm[TEX0.PSM].rtx;
readTexel rt = m_psm[TEX0.PSM].rt;
@@ -1607,7 +1613,7 @@ void GSLocalMemory::ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, GIFRe
}
///////////////////
void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(16, 8, 16)
{
@@ -1616,7 +1622,7 @@ void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, GIF
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(16, 8, 16)
{
@@ -1625,7 +1631,7 @@ void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, GI
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1653,7 +1659,7 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
}
}
void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const UINT64* pal = m_clut;
@@ -1681,7 +1687,7 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
}
}
void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1709,7 +1715,7 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIF
}
}
void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1737,7 +1743,7 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GI
}
}
void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
const DWORD* pal = m_clut;
@@ -1765,7 +1771,7 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
}
}
void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(16, 8, 16)
{
@@ -1774,7 +1780,7 @@ void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, GI
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
FOREACH_BLOCK_START(16, 8, 16)
{
@@ -1785,7 +1791,7 @@ void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, G
///////////////////
void GSLocalMemory::ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
void GSLocalMemory::ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP)
{
readTexture rtx = m_psm[TEX0.PSM].rtxNP;
readTexel rt = m_psm[TEX0.PSM].rtNP;
@@ -1828,7 +1834,7 @@ void GSLocalMemory::ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRe
}
}
void GSLocalMemory::ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
void GSLocalMemory::ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP)
{
readTexture rtx = m_psm[TEX0.PSM].rtxNP;
readTexel rt = m_psm[TEX0.PSM].rtNP;
@@ -1873,7 +1879,7 @@ void GSLocalMemory::ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, GIF
//
template<typename T>
void GSLocalMemory::ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, readTexture rtx)
void GSLocalMemory::ReadTexture(CRect r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, readTexel rt, readTexture rtx)
{
// TODO: this is a mess, make it more simple
@@ -2118,7 +2124,7 @@ if(!aligned) printf("unaligned memory pointer passed to ReadTexture\n");
}
template<typename T>
void GSLocalMemory::ReadTextureNC(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, readTexel rt, readTexture rtx)
void GSLocalMemory::ReadTextureNC(CRect r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, readTexel rt, readTexture rtx)
{
CSize bs = m_psm[TEX0.PSM].bs;

View File

@@ -36,20 +36,20 @@ public:
typedef void (GSLocalMemory::*writePixel)(int x, int y, DWORD c, DWORD bp, DWORD bw);
typedef void (GSLocalMemory::*writeFrame)(int x, int y, DWORD c, DWORD bp, DWORD bw);
typedef DWORD (GSLocalMemory::*readPixel)(int x, int y, DWORD bp, DWORD bw) const;
typedef DWORD (GSLocalMemory::*readTexel)(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
typedef DWORD (GSLocalMemory::*readTexel)(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writePixelAddr)(DWORD addr, DWORD c);
typedef void (GSLocalMemory::*writeFrameAddr)(DWORD addr, DWORD c);
typedef DWORD (GSLocalMemory::*readPixelAddr)(DWORD addr) const;
typedef DWORD (GSLocalMemory::*readTexelAddr)(DWORD addr, GIFRegTEXA& TEXA) const;
typedef DWORD (GSLocalMemory::*readTexelAddr)(DWORD addr, const GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
typedef void (GSLocalMemory::*readImage)(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const;
typedef void (GSLocalMemory::*readTexture)(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
typedef void (GSLocalMemory::*readTexture)(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
typedef union
{
struct
{
pixelAddress pa, ba, pga;
pixelAddress pa, ba, pga, pgn;
readPixel rp;
readPixelAddr rpa;
writePixel wp;
@@ -114,26 +114,48 @@ public:
// address
static DWORD PageNumber32(int x, int y, DWORD bp, DWORD bw)
{
return (bp >> 5) + (y >> 5) * bw + (x >> 6);
}
static DWORD PageNumber16(int x, int y, DWORD bp, DWORD bw)
{
return (bp >> 5) + (y >> 6) * bw + (x >> 6);
}
static DWORD PageNumber8(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7);
}
static DWORD PageNumber4(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7);
}
static DWORD PageAddress32(int x, int y, DWORD bp, DWORD bw)
{
return ((bp >> 5) + (y >> 5) * bw + (x >> 6)) << 11;
return PageNumber32(x, y, bp, bw) << 11;
}
static DWORD PageAddress16(int x, int y, DWORD bp, DWORD bw)
{
return ((bp >> 5) + (y >> 6) * bw + (x >> 6)) << 12;
return PageNumber16(x, y, bp, bw) << 12;
}
static DWORD PageAddress8(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return ((bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7)) << 13;
return PageNumber8(x, y, bp, bw) << 13;
}
static DWORD PageAddress4(int x, int y, DWORD bp, DWORD bw)
{
ASSERT((bw & 1) == 0);
return ((bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7)) << 14;
return PageNumber4(x, y, bp, bw) << 14;
}
static DWORD BlockAddress32(int x, int y, DWORD bp, DWORD bw)
@@ -625,127 +647,127 @@ public:
WriteFrame16(PixelAddress16SZ(x, y, bp, bw), c);
}
__forceinline DWORD ReadTexel32(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel32(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_vm32[addr];
}
__forceinline DWORD ReadTexel24(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel24(DWORD addr, const GIFRegTEXA& TEXA) const
{
return Expand24To32(m_vm32[addr], TEXA);
}
__forceinline DWORD ReadTexel16(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16(DWORD addr, const GIFRegTEXA& TEXA) const
{
return Expand16To32(m_vm16[addr], TEXA);
}
__forceinline DWORD ReadTexel8(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel8(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_clut[ReadPixel8(addr)];
}
__forceinline DWORD ReadTexel4(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_clut[ReadPixel4(addr)];
}
__forceinline DWORD ReadTexel8H(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel8H(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_clut[ReadPixel8H(addr)];
}
__forceinline DWORD ReadTexel4HL(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4HL(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_clut[ReadPixel4HL(addr)];
}
__forceinline DWORD ReadTexel4HH(DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4HH(DWORD addr, const GIFRegTEXA& TEXA) const
{
return m_clut[ReadPixel4HH(addr)];
}
__forceinline DWORD ReadTexel32(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel32(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel32(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel24(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel24(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel24(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel16(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel16(PixelAddress16(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel16S(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16S(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel16(PixelAddress16S(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel8(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel8(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel8(PixelAddress8(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel4(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel4(PixelAddress4(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel8H(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel8H(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel8H(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel4HL(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4HL(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel4HL(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel4HH(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel4HH(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel4HH(PixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel32Z(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel32Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel32(PixelAddress32Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel24Z(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel24Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel24(PixelAddress32Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel16Z(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16Z(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel16(PixelAddress16Z(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel16SZ(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16SZ(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadTexel16(PixelAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
}
__forceinline DWORD ReadTexel16NP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16NP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline DWORD ReadTexel16SNP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16SNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16S(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline DWORD ReadTexel16ZNP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16ZNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16Z(x, y, TEX0.TBP0, TEX0.TBW);
}
__forceinline DWORD ReadTexel16SZNP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexel16SZNP(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
return ReadPixel16SZ(x, y, TEX0.TBP0, TEX0.TBW);
}
@@ -810,7 +832,7 @@ public:
}
}
__forceinline DWORD ReadTexelX(int PSM, DWORD addr, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexelX(int PSM, DWORD addr, const GIFRegTEXA& TEXA) const
{
switch(PSM)
{
@@ -831,7 +853,7 @@ public:
}
}
__forceinline DWORD ReadTexelX(int PSM, int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const
__forceinline DWORD ReadTexelX(int PSM, int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const
{
switch(PSM)
{
@@ -1006,7 +1028,7 @@ public:
}
#if _M_SSE >= 0x401
if(fm.extract32<0>() != 0xffffffff)
{
switch(fpsm)
@@ -1202,44 +1224,44 @@ public:
//
void ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
void ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
void ReadTexture(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
void ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
// 32/16
void ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA) const;
void ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const;
void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
void ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
void ReadTextureNPNC(const CRect& r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP);
//
static DWORD m_xtbl[1024], m_ytbl[1024];
template<typename T> void ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, readTexture rtx);
template<typename T> void ReadTextureNC(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, readTexel rt, readTexture rtx);
template<typename T> void ReadTexture(CRect r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, readTexel rt, readTexture rtx);
template<typename T> void ReadTextureNC(CRect r, BYTE* dst, int dstpitch, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, readTexel rt, readTexture rtx);
//

View File

@@ -36,11 +36,6 @@ GSRasterizer::GSRasterizer(GSState* state, int id, int threads)
, m_fbco(NULL)
, m_zbco(NULL)
{
m_tc = (TextureCache*)VirtualAlloc(NULL, sizeof(TextureCache), MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
m_tc->dirty = true;
InvalidateTextureCache();
// w00t :P
#define InitDS_IIP(iFPSM, iZPSM, iZTST, iIIP) \
@@ -73,8 +68,6 @@ GSRasterizer::GSRasterizer(GSState* state, int id, int threads)
GSRasterizer::~GSRasterizer()
{
VirtualFree(m_tc, 0, MEM_RELEASE);
POSITION pos = m_comap.GetHeadPosition();
while(pos)
@@ -92,17 +85,7 @@ GSRasterizer::~GSRasterizer()
m_comap.RemoveAll();
}
void GSRasterizer::InvalidateTextureCache()
{
if(m_tc->dirty)
{
m_tc->hash = 0;
m_tc->dirty = false;
memset(m_tc->page, 0, sizeof(m_tc->page));
}
}
int GSRasterizer::Draw(Vertex* vertices, int count)
int GSRasterizer::Draw(Vertex* vertices, int count, DWORD* texture)
{
GSDrawingEnvironment& env = m_state->m_env;
GSDrawingContext* context = m_state->m_context;
@@ -240,33 +223,8 @@ int GSRasterizer::Draw(Vertex* vertices, int count)
if(PRIM->TME)
{
DWORD hash = context->TEX0.ai32[0]; // TBP0, TBW, PSM
const DWORD* clut = m_state->m_mem.m_clut;
const DWORD pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal;
if(m_tc->hash == hash)
{
if(pal > 0)
{
if(!GSVector4i::compare(m_tc->clut, clut, pal * sizeof(clut[0])))
{
m_tc->hash = 0;
}
}
}
if(m_tc->hash != hash)
{
InvalidateTextureCache();
m_tc->hash = hash;
if(pal > 0)
{
memcpy(m_tc->clut, clut, pal * sizeof(clut[0]));
}
}
m_texture = texture;
m_tw = max(3, context->TEX0.TW);
short tw = (short)(1 << context->TEX0.TW);
short th = (short)(1 << context->TEX0.TH);
@@ -327,7 +285,7 @@ int GSRasterizer::Draw(Vertex* vertices, int count)
m_slenv.t.max = m_slenv.t.max.xxxxl().xxxxh();
m_slenv.t.mask = m_slenv.t.mask.xxzz();
m_tw = (int)max(context->TEX0.TW, TEXTURE_CACHE_WIDTH);
// m_tw = (int)max(context->TEX0.TW, TEXTURE_CACHE_WIDTH);
}
//
@@ -373,11 +331,6 @@ int GSRasterizer::Draw(Vertex* vertices, int count)
__assume(0);
}
if(context->FRAME.Block() == context->TEX0.TBP0)
{
InvalidateTextureCache();
}
m_state->m_perfmon.Put(GSPerfMon::Fillrate, m_slenv.steps); // TODO: move this to the renderer, not thread safe here
return count;
@@ -701,23 +654,6 @@ bool GSRasterizer::DrawSolidRect(int left, int top, int right, int bottom, const
return true;
}
void GSRasterizer::FetchTexture(int x, int y)
{
const int xs = 1 << TEXTURE_CACHE_WIDTH;
const int ys = 1 << TEXTURE_CACHE_HEIGHT;
x &= ~(xs - 1);
y &= ~(ys - 1);
CRect r(x, y, x + xs, y + ys);
DWORD* dst = &m_tc->texture[(y << m_tw) + x];
(m_state->m_mem.*m_slenv.rtx)(r, (BYTE*)dst, (1 << m_tw) * 4, m_state->m_context->TEX0, m_state->m_env.TEXA);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
}
void GSRasterizer::SetupColumnOffset()
{
GSDrawingContext* context = m_state->m_context;
@@ -960,15 +896,10 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
continue;
}
FetchTexel(uv0.u16[i], uv0.u16[i + 4]);
FetchTexel(uv1.u16[i], uv0.u16[i + 4]);
FetchTexel(uv0.u16[i], uv1.u16[i + 4]);
FetchTexel(uv1.u16[i], uv1.u16[i + 4]);
GSVector4 c00(ReadTexelNoFetch(uv0.u16[i], uv0.u16[i + 4]));
GSVector4 c01(ReadTexelNoFetch(uv1.u16[i], uv0.u16[i + 4]));
GSVector4 c10(ReadTexelNoFetch(uv0.u16[i], uv1.u16[i + 4]));
GSVector4 c11(ReadTexelNoFetch(uv1.u16[i], uv1.u16[i + 4]));
GSVector4 c00(ReadTexel(uv0.u16[i], uv0.u16[i + 4]));
GSVector4 c01(ReadTexel(uv1.u16[i], uv0.u16[i + 4]));
GSVector4 c10(ReadTexel(uv0.u16[i], uv1.u16[i + 4]));
GSVector4 c11(ReadTexel(uv1.u16[i], uv1.u16[i + 4]));
c00 = c00.lerp(c01, uff.v[i]);
c10 = c10.lerp(c11, uff.v[i]);
@@ -1160,7 +1091,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
s = s.blend(d, fm);
}
m_state->m_mem.WriteFrameAndZBufX_NOSSE4(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
m_state->m_mem.WriteFrameAndZBufX(fpsm, fa, fm, s, ztst > 0 ? zpsm : 3, za, zm, zs, pixels);
}
while(0);
@@ -1212,10 +1143,11 @@ GSRasterizerMT::~GSRasterizerMT()
}
}
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count)
void GSRasterizerMT::BeginDraw(Vertex* vertices, int count, DWORD* texture)
{
m_vertices = vertices;
m_count = count;
m_texture = texture;
InterlockedBitTestAndSet(m_sync, m_id);
}
@@ -1233,7 +1165,7 @@ DWORD GSRasterizerMT::ThreadProc()
{
if(*m_sync & (1 << m_id))
{
Draw(m_vertices, m_count);
Draw(m_vertices, m_count, m_texture);
InterlockedBitTestAndReset(m_sync, m_id);
}

View File

@@ -43,6 +43,9 @@ protected:
int m_id;
int m_threads;
DWORD* m_texture;
DWORD m_tw;
private:
struct ColumnOffset
{
@@ -119,17 +122,6 @@ private:
ScanlineEnvironment m_slenv;
bool m_solidrect;
struct TextureCache
{
DWORD texture[1024 * 1024];
DWORD clut[256];
DWORD page[1 << (10 - TEXTURE_CACHE_HEIGHT)];
DWORD hash;
bool dirty;
};
TextureCache* m_tc;
void SetupColumnOffset();
template<bool pos, bool tex, bool col>
@@ -148,34 +140,9 @@ private:
template<DWORD sel>
void DrawScanlineEx(int top, int left, int right, const Vertex& v);
int m_tw;
void FetchTexture(int x, int y);
__forceinline void FetchTexel(int x, int y)
{
DWORD i = 1 << (x >> (10 - TEXTURE_CACHE_WIDTH));
DWORD j = y >> TEXTURE_CACHE_HEIGHT;
if((m_tc->page[j] & i) == 0)
{
m_tc->page[j] |= i;
m_tc->dirty = true;
FetchTexture(x, y);
}
}
__forceinline DWORD ReadTexelNoFetch(int x, int y)
{
return m_tc->texture[(y << m_tw) + x];
}
__forceinline DWORD ReadTexel(int x, int y)
{
FetchTexel(x, y);
return ReadTexelNoFetch(x, y);
return m_texture[(y << m_tw) + x];
}
__forceinline GSVector4i Wrap(const GSVector4i& t)
@@ -198,8 +165,7 @@ public:
GSRasterizer(GSState* state, int id = 0, int threads = 0);
virtual ~GSRasterizer();
int Draw(Vertex* v, int count);
void InvalidateTextureCache();
int Draw(Vertex* v, int count, DWORD* texture);
};
class GSRasterizerMT : public GSRasterizer
@@ -219,5 +185,5 @@ public:
GSRasterizerMT(GSState* state, int id, int threads, long* sync);
virtual ~GSRasterizerMT();
void BeginDraw(Vertex* vertices, int count);
void BeginDraw(Vertex* vertices, int count, DWORD* texture);
};

View File

@@ -929,15 +929,10 @@ void GSRasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v)
continue;
}
FetchTexel(uv0.u16[i], uv0.u16[i + 4]);
FetchTexel(uv1.u16[i], uv0.u16[i + 4]);
FetchTexel(uv0.u16[i], uv1.u16[i + 4]);
FetchTexel(uv1.u16[i], uv1.u16[i + 4]);
GSVector4 c00(ReadTexelNoFetch(uv0.u16[i], uv0.u16[i + 4]));
GSVector4 c01(ReadTexelNoFetch(uv1.u16[i], uv0.u16[i + 4]));
GSVector4 c10(ReadTexelNoFetch(uv0.u16[i], uv1.u16[i + 4]));
GSVector4 c11(ReadTexelNoFetch(uv1.u16[i], uv1.u16[i + 4]));
GSVector4 c00(ReadTexel(uv0.u16[i], uv0.u16[i + 4]));
GSVector4 c01(ReadTexel(uv1.u16[i], uv0.u16[i + 4]));
GSVector4 c10(ReadTexel(uv0.u16[i], uv1.u16[i + 4]));
GSVector4 c11(ReadTexel(uv1.u16[i], uv1.u16[i + 4]));
c00 = c00.lerp(c01, uff.v[i]);
c10 = c10.lerp(c11, uff.v[i]);

View File

@@ -24,6 +24,7 @@
#include "GSRenderer.h"
#include "GSVertexSW.h"
#include "GSRasterizer.h"
#include "GSTextureCacheSW.h"
extern const GSVector4 g_pos_scale;
@@ -35,9 +36,35 @@ class GSRendererSW : public GSRendererT<Device, GSVertexSW>
protected:
long* m_sync;
long m_threads;
GSTextureCacheSW* m_tc;
GSRasterizer* m_rst;
CAtlList<GSRasterizerMT*> m_rmt;
Texture m_texture[2];
bool m_reset;
void Reset()
{
// TODO: GSreset can come from the main thread too => crash
// m_tc->RemoveAll();
m_reset = true;
__super::Reset();
}
void VSync(int field)
{
__super::VSync(field);
m_tc->IncAge();
if(m_reset)
{
m_tc->RemoveAll();
m_reset = false;
}
}
void ResetDevice()
{
@@ -197,9 +224,66 @@ protected:
void Draw()
{
// TODO: lot to optimize here
DWORD* texture = NULL;
if(PRIM->TME)
{
m_mem.m_clut.Read32(m_context->TEX0, m_env.TEXA);
int w = 1 << m_context->TEX0.TW;
int h = 1 << m_context->TEX0.TH;
int wms = m_context->CLAMP.WMS;
int wmt = m_context->CLAMP.WMT;
int minu = (int)m_context->CLAMP.MINU;
int minv = (int)m_context->CLAMP.MINV;
int maxu = (int)m_context->CLAMP.MAXU;
int maxv = (int)m_context->CLAMP.MAXV;
CRect r = CRect(0, 0, w, h);
switch(wms)
{
case 0: // TODO
break;
case 1: // TODO
break;
case 2:
if(r.left < minu) r.left = minu;
if(r.right > maxu + 1) r.right = maxu + 1;
break;
case 3:
r.left = maxu;
r.right = r.left + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case 0: // TODO
break;
case 1: // TODO
break;
case 2:
if(r.top < minv) r.top = minv;
if(r.bottom > maxv + 1) r.bottom = maxv + 1;
break;
case 3:
r.top = maxv;
r.bottom = r.top + (minv + 1);
break;
default:
__assume(0);
}
r &= CRect(0, 0, w, h);
texture = m_tc->Lookup(m_context->TEX0, m_env.TEXA, &r);
if(!texture) {ASSERT(0); return;}
}
if(s_dump)
@@ -221,12 +305,12 @@ protected:
{
GSRasterizerMT* r = m_rmt.GetNext(pos);
r->BeginDraw(m_vertices, m_count);
r->BeginDraw(m_vertices, m_count, texture);
}
// 1st thread is this thread
int prims = m_rst->Draw(m_vertices, m_count);
int prims = m_rst->Draw(m_vertices, m_count, texture);
// wait for the other threads to finish
@@ -234,6 +318,50 @@ protected:
{
_mm_pause();
}
// TODO
{
CRect r;
r.left = max(m_context->SCISSOR.SCAX0, 0);
r.top = max(m_context->SCISSOR.SCAY0, 0);
r.right = min(m_context->SCISSOR.SCAX1 + 1, m_context->FRAME.FBW * 64);
r.bottom = min(m_context->SCISSOR.SCAY1 + 1, 4096);
GSVector4 minv(+1e10f);
GSVector4 maxv(-1e10f);
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 p = m_vertices[i].p;
minv = minv.minv(p);
maxv = maxv.maxv(p);
}
GSVector4i v(minv.xyxy(maxv));
r.left = max(r.left, min(r.right, v.x));
r.top = max(r.top, min(r.bottom, v.y));
r.right = min(r.right, max(r.left, v.z));
r.bottom = min(r.bottom, max(r.top, v.w));
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.DBP = m_context->FRAME.Block();
BITBLTBUF.DBW = m_context->FRAME.FBW;
BITBLTBUF.DPSM = m_context->FRAME.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
if(m_context->DepthWrite())
{
BITBLTBUF.DBP = m_context->ZBUF.Block();
BITBLTBUF.DPSM = m_context->ZBUF.PSM;
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
}
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Draw, 1);
@@ -250,21 +378,7 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)
{
InvalidateTextureCache();
}
void InvalidateTextureCache()
{
m_rst->InvalidateTextureCache();
POSITION pos = m_rmt.GetHeadPosition();
while(pos)
{
GSRasterizerMT* r = m_rmt.GetNext(pos);
r->InvalidateTextureCache();
}
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
public:
@@ -274,6 +388,8 @@ public:
m_sync = (long*)_aligned_malloc(sizeof(*m_sync), 128); // get a whole cache line
m_threads = AfxGetApp()->GetProfileInt(_T("Settings"), _T("swthreads"), 1);
m_tc = new GSTextureCacheSW(this);
m_rst = new GSRasterizer(this, 0, m_threads);
for(int i = 1; i < m_threads; i++)
@@ -294,6 +410,8 @@ public:
virtual ~GSRendererSW()
{
delete m_tc;
delete m_rst;
while(!m_rmt.IsEmpty())

394
gsdx/GSTextureCacheSW.cpp Normal file
View File

@@ -0,0 +1,394 @@
/*
* Copyright (C) 2007 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "StdAfx.h"
#include "GSTextureCacheSW.h"
// static FILE* m_log = NULL;
GSTextureCacheSW::GSTextureCacheSW(GSState* state)
: m_state(state)
{
// m_log = _tfopen(_T("c:\\log.txt"), _T("w"));
}
GSTextureCacheSW::~GSTextureCacheSW()
{
// fclose(m_log);
RemoveAll();
}
DWORD* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
{
GSLocalMemory& mem = m_state->m_mem;
mem.m_clut.Read32(TEX0, TEXA);
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const DWORD* clut = mem.m_clut;
const CAtlList<GSTexturePage*>& t2p = m_p2t[TEX0.TBP0 >> 5];
// fprintf(m_log, "lu %05x %d %d (%d) ", TEX0.TBP0, TEX0.TBW, TEX0.PSM, t2p.GetCount());
// if(r) fprintf(m_log, "(%d %d %d %d) ", r->left, r->top, r->right, r->bottom);
GSTexture* t = NULL;
POSITION pos = t2p.GetHeadPosition();
while(pos)
{
GSTexture* t2 = t2p.GetNext(pos)->t;
if(t2->m_TEX0.TBP0 != TEX0.TBP0 || t2->m_TEX0.TBW != TEX0.TBW || t2->m_TEX0.PSM != TEX0.PSM || t2->m_TEX0.TW != TEX0.TW || t2->m_TEX0.TH != TEX0.TH)
{
continue;
}
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC == 1 && !(t2->m_TEXA == (GSVector4i)TEXA).alltrue()))
{
continue;
}
if(psm.pal > 0 && !GSVector4i::compare(t2->m_clut, clut, psm.pal * sizeof(clut[0])))
{
continue;
}
// fprintf(m_log, "cache hit\n");
t = t2;
t->m_age = 0;
break;
}
if(t == NULL)
{
// fprintf(m_log, "cache miss\n");
t = new GSTexture(m_state);
t->m_pos = m_textures.AddTail(t);
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
DWORD bp = TEX0.TBP0;
DWORD bw = TEX0.TBW;
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy)
{
DWORD page = psm.pgn(0, y, bp, bw);
for(int i = 0, x = 0; x < tw && page < MAX_PAGES; i++, x += psm.pgs.cx, page++)
{
GSTexturePage* p = new GSTexturePage();
p->t = t;
p->row = j;
p->col = i;
GSTexturePageEntry* p2te = new GSTexturePageEntry();
p2te->p2t = &m_p2t[page];
p2te->pos = m_p2t[page].AddHead(p);
t->m_p2te.AddTail(p2te);
t->m_maxpages++;
}
}
}
if(!t->Update(TEX0, TEXA, r))
{
m_textures.RemoveAt(t->m_pos);
delete t;
printf("!@#$%\n"); // memory allocation may fail if the game is too hungry
return NULL;
}
return t->m_texture;
}
void GSTextureCacheSW::RemoveAll()
{
POSITION pos = m_textures.GetHeadPosition();
while(pos)
{
delete m_textures.GetNext(pos);
}
m_textures.RemoveAll();
for(int i = 0; i < MAX_PAGES; i++)
{
CAtlList<GSTexturePage*>& t2p = m_p2t[i];
ASSERT(t2p.IsEmpty());
POSITION pos = t2p.GetHeadPosition();
while(pos)
{
delete t2p.GetNext(pos);
}
t2p.RemoveAll();
}
}
void GSTextureCacheSW::IncAge()
{
POSITION pos = m_textures.GetHeadPosition();
while(pos)
{
POSITION cur = pos;
GSTexture* t = m_textures.GetNext(pos);
if(++t->m_age > 3)
{
m_textures.RemoveAt(cur);
delete t;
}
}
}
void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[BITBLTBUF.DPSM];
CRect r2;
r2.left = r.left & ~(psm.pgs.cx - 1);
r2.top = r.top & ~(psm.pgs.cy - 1);
r2.right = (r.right + (psm.pgs.cx - 1)) & ~(psm.pgs.cx - 1);
r2.bottom = (r.bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
// fprintf(m_log, "ivm %05x %d %d (%d %d %d %d)\n", bp, bw, BITBLTBUF.DPSM, r2.left, r2.top, r2.right, r2.bottom);
for(int y = r2.top; y < r2.bottom; y += psm.pgs.cy)
{
DWORD page = psm.pgn(r2.left, y, bp, bw);
for(int x = r2.left; x < r2.right && page < MAX_PAGES; x += psm.pgs.cx, page++)
{
const CAtlList<GSTexturePage*>& t2p = m_p2t[page];
POSITION pos = t2p.GetHeadPosition();
while(pos)
{
GSTexturePage* p = t2p.GetNext(pos);
DWORD flag = 1 << p->col;
if((p->t->m_valid[p->row] & flag) == 0)
{
continue;
}
if(GSUtil::HasSharedBits(BITBLTBUF.DPSM, p->t->m_TEX0.PSM))
{
p->t->m_valid[p->row] &= ~flag;
p->t->m_pages--;
// fprintf(m_log, "ivm hit %05x %d %d (%d %d) (%d)", p->t->m_TEX0.TBP0, p->t->m_TEX0.TBW, p->t->m_TEX0.PSM, p->row, p->col, p->t->m_pages);
// if(p->t->m_pages == 0) fprintf(m_log, " *");
// fprintf(m_log, "\n");
}
}
}
}
}
//
GSTextureCacheSW::GSTexture::GSTexture(GSState* state)
: m_state(state)
, m_texture(NULL)
, m_clut(NULL)
, m_maxpages(0)
, m_pages(0)
, m_pos(NULL)
, m_age(0)
{
memset(m_valid, 0, sizeof(m_valid));
}
GSTextureCacheSW::GSTexture::~GSTexture()
{
if(m_texture)
{
_aligned_free(m_texture);
}
if(m_clut)
{
_aligned_free(m_clut);
}
POSITION pos = m_p2te.GetHeadPosition();
while(pos)
{
GSTexturePageEntry* p2te = m_p2te.GetNext(pos);
GSTexturePage* p = p2te->p2t->GetAt(p2te->pos);
ASSERT(p->t == this);
delete p;
p2te->p2t->RemoveAt(p2te->pos);
delete p2te;
}
m_p2te.RemoveAll();
}
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r)
{
if(m_pages == m_maxpages)
{
return true;
}
m_TEX0 = TEX0;
m_TEXA = TEXA;
GSLocalMemory& mem = m_state->m_mem;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const DWORD* clut = mem.m_clut;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
if(tw < psm.bs.cx) tw = psm.bs.cx;
if(th < psm.bs.cy) th = psm.bs.cy;
if(m_texture == NULL)
{
// fprintf(m_log, "up new (%d %d)\n", tw, th);
m_texture = (DWORD*)_aligned_malloc(tw * th * sizeof(DWORD), 16);
if(m_texture == NULL)
{
return false;
}
m_clut = (DWORD*)_aligned_malloc(256 * sizeof(DWORD), 16);
if(m_clut == NULL)
{
return false;
}
if(psm.pal > 0)
{
memcpy(m_clut, clut, psm.pal * sizeof(clut[0]));
}
}
CRect r2;
if(r)
{
r2.left = r->left & ~(psm.pgs.cx - 1);
r2.top = r->top & ~(psm.pgs.cy - 1);
r2.right = (r->right + (psm.pgs.cx - 1)) & ~(psm.pgs.cx - 1);
r2.bottom = (r->bottom + (psm.pgs.cy - 1)) & ~(psm.pgs.cy - 1);
}
DWORD* texture = m_texture;
DWORD pitch = 1 << max(3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_texture
DWORD mask = pitch - 1;
for(int j = 0, y = 0; y < th; j++, y += psm.pgs.cy, texture += pitch * psm.pgs.cy)
{
if(m_valid[j] == mask)
{
continue;
}
if(r)
{
if(y < r2.top) continue;
if(y >= r2.bottom) break;
}
DWORD page = psm.pgn(0, y, TEX0.TBP0, TEX0.TBW);
for(int i = 0, x = 0; x < tw && page < MAX_PAGES; i++, x += psm.pgs.cx, page++)
{
if(r)
{
if(x < r2.left) continue;
if(x >= r2.right) break;
}
DWORD flag = 1 << i;
if(m_valid[j] & flag)
{
continue;
}
m_valid[j] |= flag;
m_pages++;
ASSERT(m_pages <= m_maxpages);
CRect r;
r.left = x;
r.top = y;
r.right = min(x + psm.pgs.cx, tw);
r.bottom = min(y + psm.pgs.cy, th);
// fprintf(m_log, "up fetch (%d %d) (%d %d %d %d)\n", j, i, r.left, r.top, r.right, r.bottom);
(mem.*psm.rtx)(r, (BYTE*)&texture[x], pitch * 4, TEX0, TEXA);
m_state->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
}
}
return true;
}

83
gsdx/GSTextureCacheSW.h Normal file
View File

@@ -0,0 +1,83 @@
/*
* Copyright (C) 2007 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSRenderer.h"
#define MAX_PAGES 512
class GSTextureCacheSW
{
public:
class GSTexture;
class GSTexturePage;
class GSTexturePage
{
public:
GSTexture* t;
DWORD row, col;
};
class GSTexturePageEntry
{
public:
CAtlList<GSTexturePage*>* p2t;
POSITION pos;
};
class GSTexture
{
public:
GSState* m_state;
GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA;
DWORD* m_texture;
DWORD* m_clut;
DWORD m_valid[32];
DWORD m_maxpages;
DWORD m_pages;
CAtlList<GSTexturePageEntry*> m_p2te;
POSITION m_pos;
DWORD m_age;
explicit GSTexture(GSState* state);
virtual ~GSTexture();
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
};
protected:
GSState* m_state;
CAtlList<GSTexture*> m_textures;
CAtlList<GSTexturePage*> m_p2t[MAX_PAGES];
public:
GSTextureCacheSW(GSState* state);
virtual ~GSTextureCacheSW();
DWORD* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const CRect* r = NULL);
void RemoveAll();
void IncAge();
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r);
};

View File

@@ -792,7 +792,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
s_gs->PRIM->PRIM = GS_SPRITE;
ras->Draw(vertices, count);
*/
// triangle
vertices[0].p = GSVector4(0, 0, 0, 0);
@@ -813,7 +813,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
s_gs->PRIM->PRIM = GS_TRIANGLELIST;
ras->Draw(vertices, 999);
*/
//
_aligned_free(vertices);

View File

@@ -1411,6 +1411,10 @@
RelativePath=".\GSTextureCache9.cpp"
>
</File>
<File
RelativePath=".\GSTextureCacheSW.cpp"
>
</File>
<File
RelativePath=".\GSTextureFX10.cpp"
>
@@ -1733,6 +1737,10 @@
RelativePath=".\GSTextureCache9.h"
>
</File>
<File
RelativePath=".\GSTextureCacheSW.h"
>
</File>
<File
RelativePath=".\GSTextureFX10.h"
>