This commit is contained in:
gabest
2008-07-08 16:59:38 +00:00
parent f559ee9342
commit 32e7a34771
10 changed files with 392 additions and 201 deletions

View File

@@ -68,7 +68,7 @@ public:
{
GSVector4i v4((int)mask);
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
if(mask == 0xff000000 || mask == 0x00ffffff)
{
@@ -87,7 +87,7 @@ public:
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, v4);
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, v4);
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
}
@@ -1090,7 +1090,7 @@ public:
{
for(int j = 0; j < 16; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1112,7 +1112,7 @@ public:
{
for(int j = 0; j < 16; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1134,7 +1134,7 @@ public:
{
for(int j = 0; j < 16; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1156,7 +1156,7 @@ public:
{
for(int j = 0; j < 16; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1178,7 +1178,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1201,7 +1201,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1226,7 +1226,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1249,7 +1249,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1274,7 +1274,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1297,7 +1297,7 @@ public:
{
for(int j = 0; j < 8; j++, dst += dstpitch)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i* s = (GSVector4i*)src;
GSVector4i* d = (GSVector4i*)dst;
@@ -1318,8 +1318,6 @@ public:
}
}
// TODO: UnpackAndWrite*
__forceinline static void UnpackAndWriteBlock24(BYTE* src, int srcpitch, BYTE* dst)
{
#if _M_SSE >= 0x200
@@ -1755,7 +1753,7 @@ public:
__forceinline static void ReadAndExpandBlock8_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2, v3;
@@ -1825,7 +1823,7 @@ public:
__forceinline static void ReadAndExpandBlock4_32(BYTE* src, BYTE* dst, int dstpitch, UINT64* pal)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2, v3;
@@ -1917,7 +1915,7 @@ public:
__forceinline static void ReadAndExpandBlock8H_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2, v3;
@@ -1968,7 +1966,7 @@ public:
__forceinline static void ReadAndExpandBlock4HL_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2, v3;
@@ -2019,7 +2017,7 @@ public:
__forceinline static void ReadAndExpandBlock4HH_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
{
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i v0, v1, v2, v3;

View File

@@ -30,11 +30,13 @@
#define ASSERT_BLOCK(r, w, h) \
ASSERT((r).Width() >= w && (r).Height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \
#define FOREACH_BLOCK_START(r, w, h, t) \
for(int y = (r).top; y < (r).bottom; y += (h)) \
{ ASSERT_BLOCK(r, w, h); \
BYTE* ptr = dst + (y-(r).top)*dstpitch; \
for(int x = (r).left; x < (r).right; x += (w)) \
#define FOREACH_BLOCK_START(w, h, bpp) \
DWORD bp = TEX0.TBP0; \
DWORD bw = TEX0.TBW; \
int offset = dstpitch * h - (r.right - r.left) * bpp / 8; \
for(int y = r.top; y < r.bottom; y += h, dst += offset) \
{ ASSERT_BLOCK(r, w, h); \
for(int x = r.left; x < r.right; x += w, dst += w * bpp / 8) \
{ \
#define FOREACH_BLOCK_END }}
@@ -813,6 +815,9 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 4;
int th = len / srcpitch;
@@ -832,14 +837,14 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < twa; x += 8)
{
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, ty, bp, bw)], src + (x - tx) * 4, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel32(x, ty, ((DWORD*)src)[x - tx], bp, bw);
}
}
}
@@ -857,14 +862,14 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < twa; x += 8)
{
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32(x, ty & ~7, bp, bw)], src + (x - tx) * 4, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel32(x, ty, ((DWORD*)src)[x - tx], bp, bw);
}
}
}
@@ -882,7 +887,7 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
}
}
}
@@ -892,7 +897,7 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
}
}
}
@@ -905,6 +910,9 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3;
int th = len / srcpitch;
@@ -924,7 +932,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
}
}
@@ -936,6 +944,9 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
int th = len / srcpitch;
@@ -955,14 +966,14 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < twa; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -980,14 +991,14 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < twa; x += 16)
{
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1005,7 +1016,7 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1015,7 +1026,7 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1028,6 +1039,9 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
int th = len / srcpitch;
@@ -1047,14 +1061,14 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16S(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1072,14 +1086,14 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 16)
{
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16S(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16S(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16S(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1097,7 +1111,7 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1107,7 +1121,7 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1120,6 +1134,9 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
int th = len / srcpitch;
@@ -1139,14 +1156,14 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < twa; x += 16)
{
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, ty, bp, bw)], src + (x - tx), srcpitch);
}
for(int i = 0; i < 16; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel8(x, ty, src[x - tx], bp, bw);
}
}
}
@@ -1164,14 +1181,14 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < twa; x += 16)
{
WriteColumn8<false>(ty, (BYTE*)&m_vm8[BlockAddress8(x, ty & ~15, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
WriteColumn8<false>(ty, (BYTE*)&m_vm8[BlockAddress8(x, ty & ~15, bp, bw)], src + (x - tx), srcpitch);
}
for(int i = 0; i < 4; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel8(x, ty, src[x - tx], bp, bw);
}
}
}
@@ -1189,7 +1206,7 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], src + (x - tx), srcpitch);
}
}
}
@@ -1199,7 +1216,7 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
WriteBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], src + (x - tx), srcpitch);
}
}
}
@@ -1212,6 +1229,9 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
int th = len / srcpitch;
@@ -1231,7 +1251,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < twa; x += 32)
{
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, ty, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
}
for(int i = 0; i < 16; i++, ty++, src += srcpitch)
@@ -1240,8 +1260,8 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
for(int x = twa; x < tw; x += 2, s++)
{
WritePixel4(x, ty, *s & 0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
WritePixel4(x + 1, ty, *s >> 4, BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel4(x, ty, *s & 0xf, bp, bw),
WritePixel4(x + 1, ty, *s >> 4, bp, bw);
}
}
}
@@ -1259,7 +1279,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < twa; x += 32)
{
WriteColumn4<false>(ty, (BYTE*)&m_vm8[BlockAddress4(x, ty & ~15, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
WriteColumn4<false>(ty, (BYTE*)&m_vm8[BlockAddress4(x, ty & ~15, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
}
for(int i = 0; i < 4; i++, ty++, src += srcpitch)
@@ -1268,8 +1288,8 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
for(int x = twa; x < tw; x += 2, s++)
{
WritePixel4(x, ty, *s & 0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
WritePixel4(x + 1, ty, *s >> 4, BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel4(x, ty, *s & 0xf, bp, bw),
WritePixel4(x + 1, ty, *s >> 4, bp, bw);
}
}
}
@@ -1287,7 +1307,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < tw; x += 32)
{
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
}
}
}
@@ -1297,7 +1317,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
{
for(int x = tx; x < tw; x += 32)
{
WriteBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
WriteBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
}
}
}
@@ -1310,6 +1330,9 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
int th = len / srcpitch;
@@ -1329,7 +1352,7 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
}
}
@@ -1341,6 +1364,9 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
int th = len / srcpitch;
@@ -1360,7 +1386,7 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
}
}
@@ -1372,6 +1398,9 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
int th = len / srcpitch;
@@ -1391,7 +1420,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
}
}
@@ -1403,6 +1432,9 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 4;
int th = len / srcpitch;
@@ -1422,14 +1454,14 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 8)
{
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, ty, bp, bw)], src + (x - tx) * 4, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1447,14 +1479,14 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 8)
{
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32Z(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32Z(x, ty & ~7, bp, bw)], src + (x - tx) * 4, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1472,7 +1504,7 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
}
}
}
@@ -1482,7 +1514,7 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
}
}
}
@@ -1495,6 +1527,9 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3;
int th = len / srcpitch;
@@ -1514,7 +1549,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 8)
{
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)]);
}
}
@@ -1526,6 +1561,9 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
int th = len / srcpitch;
@@ -1545,14 +1583,14 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1570,14 +1608,14 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < twa; x += 16)
{
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16Z(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16Z(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1595,7 +1633,7 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1605,7 +1643,7 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1618,6 +1656,9 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
{
if(TRXREG.RRW == 0) return;
DWORD bp = BITBLTBUF.DBP;
DWORD bw = BITBLTBUF.DBW;
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
int th = len / srcpitch;
@@ -1637,14 +1678,14 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
{
for(int x = tx; x < twa; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1662,14 +1703,14 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
{
for(int x = tx; x < twa; x += 16)
{
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16SZ(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16SZ(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
}
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
{
for(int x = twa; x < tw; x++)
{
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], bp, bw);
}
}
}
@@ -1687,7 +1728,7 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -1697,7 +1738,7 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
{
for(int x = tx; x < tw; x += 16)
{
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
}
}
}
@@ -2065,28 +2106,29 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBL
void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch);
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
else
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2096,11 +2138,11 @@ void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRe
{
__declspec(align(16)) WORD block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 16)
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2109,11 +2151,11 @@ void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFR
{
__declspec(align(16)) WORD block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 16S)
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2122,9 +2164,9 @@ void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFReg
{
DWORD* pal = m_clut32;
FOREACH_BLOCK_START(r, 16, 16, 8)
FOREACH_BLOCK_START(16, 16, 32)
{
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2133,9 +2175,9 @@ void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFReg
{
UINT64* pal = m_clut64;
FOREACH_BLOCK_START(r, 32, 16, 4)
FOREACH_BLOCK_START(32, 16, 32)
{
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW) >> 1], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2144,9 +2186,9 @@ void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRe
{
DWORD* pal = m_clut32;
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2155,9 +2197,9 @@ void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFR
{
DWORD* pal = m_clut32;
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2166,18 +2208,18 @@ void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFR
{
DWORD* pal = m_clut32;
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch);
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
@@ -2186,17 +2228,17 @@ void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
{
if(TEXA.AEM)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
else
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2206,11 +2248,11 @@ void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
{
__declspec(align(16)) WORD block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 16)
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2219,11 +2261,11 @@ void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, GIF
{
__declspec(align(16)) WORD block[16 * 8];
FOREACH_BLOCK_START(r, 16, 8, 16S)
FOREACH_BLOCK_START(16, 8, 32)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
ExpandBlock16(block, dst, dstpitch, TEXA);
}
FOREACH_BLOCK_END
}
@@ -2270,18 +2312,18 @@ void GSLocalMemory::ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, GIFRe
void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16)
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16S)
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
@@ -2292,9 +2334,9 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 16, 16, 8)
FOREACH_BLOCK_START(16, 16, 32)
{
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2304,11 +2346,11 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
__declspec(align(16)) BYTE block[16 * 16];
FOREACH_BLOCK_START(r, 16, 16, 8)
FOREACH_BLOCK_START(16, 16, 16)
{
ReadBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 16);
ReadBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 16);
ExpandBlock8_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
ExpandBlock8_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2320,9 +2362,9 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 32, 16, 4)
FOREACH_BLOCK_START(32, 16, 32)
{
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW) >> 1], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2332,11 +2374,11 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
__declspec(align(16)) BYTE block[(32 / 2) * 16];
FOREACH_BLOCK_START(r, 32, 16, 4)
FOREACH_BLOCK_START(32, 16, 16)
{
ReadBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block) / 16);
ReadBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw)>>1], (BYTE*)block, sizeof(block) / 16);
ExpandBlock4_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
ExpandBlock4_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2348,9 +2390,9 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIF
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2360,11 +2402,11 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIF
__declspec(align(16)) DWORD block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock8H_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
ExpandBlock8H_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2376,9 +2418,9 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GI
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2388,11 +2430,11 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GI
__declspec(align(16)) DWORD block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock4HL_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
ExpandBlock4HL_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2404,9 +2446,9 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
{
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 32)
{
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2416,11 +2458,11 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
__declspec(align(16)) DWORD block[8 * 8];
FOREACH_BLOCK_START(r, 8, 8, 32)
FOREACH_BLOCK_START(8, 8, 16)
{
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
ExpandBlock4HH_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
ExpandBlock4HH_16(block, dst, dstpitch, pal);
}
FOREACH_BLOCK_END
}
@@ -2428,18 +2470,18 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16)
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}
void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
{
FOREACH_BLOCK_START(r, 16, 8, 16S)
FOREACH_BLOCK_START(16, 8, 16)
{
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], dst, dstpitch);
}
FOREACH_BLOCK_END
}

View File

@@ -130,12 +130,14 @@ public:
static DWORD PageAddress8(int x, int y, DWORD bp, DWORD bw)
{
return ((bp >> 5) + (y >> 6) * ((bw + 1) >> 1) + (x >> 7)) << 13;
ASSERT((bw & 1) == 0);
return ((bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7)) << 13;
}
static DWORD PageAddress4(int x, int y, DWORD bp, DWORD bw)
{
return ((bp >> 5) + (y >> 7) * ((bw + 1) >> 1) + (x >> 7)) << 14;
ASSERT((bw & 1) == 0);
return ((bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7)) << 14;
}
static DWORD BlockAddress32(int x, int y, DWORD bp, DWORD bw)
@@ -161,14 +163,16 @@ public:
static DWORD BlockAddress8(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f);
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
return (page + block) << 8;
}
static DWORD BlockAddress4(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f);
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
return (page + block) << 9;
}
@@ -223,16 +227,18 @@ public:
static DWORD PixelAddressOrg8(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f);
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
DWORD word = ((page + block) << 8) + columnTable8[y & 15][x & 15];
// ASSERT(word < 1024*1024*4);
ASSERT(word < 1024*1024*4);
return word;
}
static DWORD PixelAddressOrg4(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f);
ASSERT((bw & 1) == 0);
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
DWORD word = ((page + block) << 9) + columnTable4[y & 15][x & 31];
ASSERT(word < 1024*1024*8);
@@ -289,14 +295,16 @@ public:
static __forceinline DWORD PixelAddress8(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = (bp >> 5) + (y >> 6) * ((bw + 1)>>1) + (x >> 7);
ASSERT((bw & 1) == 0);
DWORD page = (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7);
DWORD word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f];
return word;
}
static __forceinline DWORD PixelAddress4(int x, int y, DWORD bp, DWORD bw)
{
DWORD page = (bp >> 5) + (y >> 7) * ((bw + 1)>>1) + (x >> 7);
ASSERT((bw & 1) == 0);
DWORD page = (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7);
DWORD word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f];
return word;
}
@@ -880,7 +888,7 @@ public:
{
case PSM_PSMCT32:
case PSM_PSMZ32:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm32);
#else
c = GSVector4i(
@@ -892,7 +900,7 @@ public:
break;
case PSM_PSMCT24:
case PSM_PSMZ24:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm32);
#else
c = GSVector4i(
@@ -907,7 +915,7 @@ public:
case PSM_PSMCT16S:
case PSM_PSMZ16:
case PSM_PSMZ16S:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
c = addr.gather32_32(m_vm16);
#else
c = GSVector4i(
@@ -933,7 +941,7 @@ public:
switch(PSM)
{
case PSM_PSMZ32:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm32);
#else
z = GSVector4i(
@@ -944,7 +952,7 @@ public:
#endif
break;
case PSM_PSMZ24:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm32) & 0x00ffffff;
#else
z = GSVector4i(
@@ -957,7 +965,7 @@ public:
break;
case PSM_PSMZ16:
case PSM_PSMZ16S:
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
z = addr.gather32_32(m_vm16);
#else
z = GSVector4i(

View File

@@ -608,6 +608,11 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
FlushWrite();
m_mem.WriteCLUT(r->TEX0, m_env.TEXCLUT);
if((m_env.CTXT[i].TEX0.TBW & 1) && (m_env.CTXT[i].TEX0.PSM == PSM_PSMT8 || m_env.CTXT[i].TEX0.PSM == PSM_PSMT4))
{
m_env.CTXT[i].TEX0.TBW &= ~1;
}
}
template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
@@ -923,6 +928,16 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
}
m_env.BITBLTBUF = r->BITBLTBUF;
if((m_env.BITBLTBUF.SBW & 1) && (m_env.BITBLTBUF.SPSM == PSM_PSMT8 || m_env.BITBLTBUF.SPSM == PSM_PSMT4))
{
m_env.BITBLTBUF.SBW &= ~1;
}
if((m_env.BITBLTBUF.DBW & 1) && (m_env.BITBLTBUF.DPSM == PSM_PSMT8 || m_env.BITBLTBUF.DPSM == PSM_PSMT4))
{
m_env.BITBLTBUF.DBW &= ~1; // namcoXcapcom: 5, 11, refered to as 4, 10 in TEX0.TBW later
}
}
void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
@@ -1059,24 +1074,14 @@ void GSState::FlushWrite(BYTE* mem, int len)
void GSState::Write(BYTE* mem, int len)
{
/**/
/*
TRACE(_T("Write len=%d DBP=%05x DBW=%d DPSM=%d DSAX=%d DSAY=%d RRW=%d RRH=%d\n"),
len, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
(int)m_env.TRXPOS.DSAX, (int)m_env.TRXPOS.DSAY,
(int)m_env.TRXREG.RRW, (int)m_env.TRXREG.RRH);
*/
if(len == 0) return;
if(m_game.title == CRC::NamcoXCapcom)
{
if(m_env.BITBLTBUF.DBP == 0x03018 && m_env.BITBLTBUF.DBW == 11 && m_env.BITBLTBUF.DPSM == PSM_PSMT8
|| m_env.BITBLTBUF.DBP == 0x03b80 && m_env.BITBLTBUF.DBW == 5 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{
m_env.BITBLTBUF.DBW--; // WTF
}
}
if(m_y >= m_env.TRXREG.RRH) return; // TODO: handle overflow during writing data too (just chop len below somewhere)
// TODO: hmmmm

View File

@@ -145,7 +145,7 @@ public:
UINT32 rgba32() const
{
__m128i r = m;
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
r = _mm_packus_epi32(r, r);
#else
r = _mm_packs_epi32(r, r); // good enough for colors...
@@ -157,7 +157,7 @@ public:
UINT64 rgba64() const
{
__m128i r = m;
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
r = _mm_packus_epi32(r, r);
#else
r = _mm_packs_epi32(r, r); // good enough for colors...
@@ -169,7 +169,7 @@ public:
#endif
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const
{
return GSVector4i(_mm_min_epi8(_mm_max_epi8(m, a), b));
@@ -181,7 +181,7 @@ public:
return GSVector4i(_mm_min_epi16(_mm_max_epi16(m, a), b));
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const
{
return GSVector4i(_mm_min_epi32(_mm_max_epi32(m, a), b));
@@ -193,14 +193,14 @@ public:
return GSVector4i(_mm_min_epu8(_mm_max_epu8(m, a), b));
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i sat_u16(const GSVector4i& a, const GSVector4i& b) const
{
return GSVector4i(_mm_min_epu16(_mm_max_epu16(m, a), b));
}
#endif
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i sat_u32(const GSVector4i& a, const GSVector4i& b) const
{
return GSVector4i(_mm_min_epu32(_mm_max_epu32(m, a), b));
@@ -212,7 +212,7 @@ public:
return GSVector4i(_mm_blendv_epi8(m, a, mask));
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
template<int mask> GSVector4i blend16(const GSVector4i& a) const
{
return GSVector4i(_mm_blend_epi16(m, a, mask));
@@ -246,7 +246,7 @@ public:
return GSVector4i(_mm_packs_epi32(m, a));
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
GSVector4i pu32(const GSVector4i& a) const
{
return GSVector4i(_mm_packus_epi32(m, a));
@@ -403,6 +403,58 @@ public:
return GSVector4i(_mm_srli_epi64(m, i));
}
GSVector4i add8(const GSVector4i& v) const
{
return GSVector4i(_mm_add_epi8(m, v.m));
}
GSVector4i add16(const GSVector4i& v) const
{
return GSVector4i(_mm_add_epi16(m, v.m));
}
GSVector4i add32(const GSVector4i& v) const
{
return GSVector4i(_mm_add_epi32(m, v.m));
}
GSVector4i sub8(const GSVector4i& v) const
{
return GSVector4i(_mm_sub_epi8(m, v.m));
}
GSVector4i sub16(const GSVector4i& v) const
{
return GSVector4i(_mm_sub_epi16(m, v.m));
}
GSVector4i sub32(const GSVector4i& v) const
{
return GSVector4i(_mm_sub_epi32(m, v.m));
}
GSVector4i mul16hs(const GSVector4i& v) const
{
return GSVector4i(_mm_mulhi_epi16(m, v.m));
}
GSVector4i mul16hu(const GSVector4i& v) const
{
return GSVector4i(_mm_mulhi_epu16(m, v.m));
}
GSVector4i mul16l(const GSVector4i& v) const
{
return GSVector4i(_mm_mullo_epi16(m, v.m));
}
#if _M_SSE >= 0x301
GSVector4i mul16hrs(const GSVector4i& v) const
{
return GSVector4i(_mm_mulhrs_epi16(m, v.m));
}
#endif
GSVector4i andnot(const GSVector4i& v) const
{
return GSVector4i(_mm_andnot_si128(v.m, m));
@@ -413,7 +465,17 @@ public:
return _mm_movemask_epi8(m);
}
#if _M_SSE >= 0x400
template<int i> GSVector4i insert16(int a) const
{
return GSVector4i(_mm_insert_epi16(m, a, i));
}
template<int i> int extract16() const
{
return _mm_extract_epi16(m, i);
}
#if _M_SSE >= 0x401
template<int i> GSVector4i insert8(int a) const
{
@@ -425,16 +487,6 @@ public:
return _mm_extract_epi8(m, i);
}
template<int i> GSVector4i insert16(int a) const
{
return GSVector4i(_mm_insert_epi16(m, a, i));
}
template<int i> int extract16() const
{
return _mm_extract_epi16(m, i);
}
template<int i> GSVector4i insert32(int a) const
{
return GSVector4i(_mm_insert_epi32(m, a, i));
@@ -862,7 +914,7 @@ public:
return GSVector4i(0) == GSVector4i(0);
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
static GSVector4i loadnt(const void* p)
{
return GSVector4i(_mm_stream_load_si128((__m128i*)p));
@@ -919,6 +971,11 @@ public:
}
#endif
static void storent(void* p, const GSVector4i& v)
{
_mm_stream_si128((__m128i*)p, v.m);
}
static void storel(void* p, const GSVector4i& v)
{
_mm_storel_epi64((__m128i*)p, v.m);
@@ -940,7 +997,7 @@ public:
GSVector4i::storeh(ph, v);
}
template<bool aligned> static void store(const void* p, const GSVector4i& v)
template<bool aligned> static void store(void* p, const GSVector4i& v)
{
if(aligned) _mm_store_si128((__m128i*)p, v.m);
else _mm_storeu_si128((__m128i*)p, v.m);
@@ -1186,6 +1243,11 @@ public:
return (v1 < v2) | (v1 == v2);
}
template<int i> GSVector4i shuffle() const
{
return GSVector4i(_mm_shuffle_epi32(m, _MM_SHUFFLE(i, i, i, i)));
}
#define VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
GSVector4i xs##ys##zs##ws() const {return GSVector4i(_mm_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
GSVector4i xs##ys##zs##ws##l() const {return GSVector4i(_mm_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
@@ -1392,7 +1454,7 @@ public:
#endif
}
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
template<int i> GSVector4 dp(const GSVector4& v) const
{
return GSVector4(_mm_dp_ps(m, v.m, i));
@@ -1434,6 +1496,16 @@ public:
return GSVector4(_mm_unpackhi_ps(m, a));
}
GSVector4 l2h(const GSVector4& a) const
{
return GSVector4(_mm_movelh_ps(m, a));
}
GSVector4 h2l(const GSVector4& a) const
{
return GSVector4(_mm_movehl_ps(m, a));
}
GSVector4 andnot(const GSVector4& v) const
{
return GSVector4(_mm_andnot_ps(v.m, m));
@@ -1461,8 +1533,26 @@ public:
__forceinline static void transpose(GSVector4& a, GSVector4& b, GSVector4& c, GSVector4& d)
{
_MM_TRANSPOSE4_PS(a.m, b.m, c.m, d.m);
}
GSVector4 v0 = a.xyxy(b);
GSVector4 v1 = c.xyxy(d);
GSVector4 v2 = a.zwzw(b);
GSVector4 v3 = c.zwzw(d);
a = v0.xzxz(v1);
b = v0.ywyw(v1);
c = v2.xzxz(v3);
d = v2.ywyw(v3);
/*
GSVector4 v0 = a.upl(b);
GSVector4 v1 = a.uph(b);
GSVector4 v2 = c.upl(d);
GSVector4 v3 = c.uph(d);
a = v0.l2h(v2);
b = v2.h2l(v0);
c = v1.l2h(v3);
d = v3.h2l(v1);
*/ }
void operator += (const GSVector4& v)
{
@@ -1604,6 +1694,11 @@ public:
return GSVector4(_mm_cmple_ps(v1, v2));
}
template<int i> GSVector4 shuffle() const
{
return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
}
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
GSVector4 xs##ys##zs##ws() const {return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
GSVector4 xs##ys##zs##ws(const GSVector4& v) const {return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \

View File

@@ -105,6 +105,30 @@ BOOL GSdxApp::InitInstance()
return TRUE;
}
static bool CheckSSE()
{
__try
{
static __m128i m;
#if _M_SSE >= 0x402
m.m128i_i32[0] = _mm_popcnt_u32(1234);
#elif _M_SSE >= 0x401
m = _mm_packus_epi32(m, m);
#elif _M_SSE >= 0x301
m = _mm_alignr_epi8(m, m, 1);
#elif _M_SSE >= 0x200
m = _mm_packs_epi32(m, m);
#endif
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
return false;
}
return true;
}
//
#define PS2E_LT_GS 0x01
@@ -137,8 +161,10 @@ EXPORT_C_(char*) PS2EgetLibName()
sl.AddTail(s);
#endif
#if _M_SSE >= 0x400
sl.AddTail(_T("SSE4"));
#if _M_SSE >= 0x402
sl.AddTail(_T("SSE42"));
#elif _M_SSE >= 0x401
sl.AddTail(_T("SSE41"));
#elif _M_SSE >= 0x301
sl.AddTail(_T("SSSE3"));
#elif _M_SSE >= 0x200
@@ -221,6 +247,8 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
{
AFX_MANAGE_STATE(AfxGetStaticModuleState());
//
CString str;
str.Format(_T("d3dx9_%d.dll"), D3DX_SDK_VERSION);
@@ -241,6 +269,18 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
return -1;
}
//
if(!CheckSSE())
{
CString str;
str.Format(_T("This CPU does not support SSE %d.%02d"), _M_SSE >> 8, _M_SSE & 0xff);
AfxMessageBox(str, MB_OK);
return -1;
}
//
GSclose();
// TODO
@@ -555,6 +595,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
//
//for(int tbw = 5; tbw <= 10; tbw++)
for(int tbw = 5; tbw <= 10; tbw++)
{
int n = 256 << ((10 - tbw) * 2);

View File

@@ -788,6 +788,7 @@
</Configuration>
<Configuration
Name="Debug SSE4|Win32"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse4.vsprops"
UseOfMFC="1"

View File

@@ -105,7 +105,7 @@
// sse4
#if _M_SSE >= 0x400
#if _M_SSE >= 0x401
#include <smmintrin.h>

View File

@@ -6,7 +6,7 @@
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions="_M_SSE=0x401"
EnableEnhancedInstructionSet="2"
PreprocessorDefinitions="_M_SSE=0x400"
/>
</VisualStudioPropertySheet>

View File

@@ -788,6 +788,7 @@
</Configuration>
<Configuration
Name="Debug SSE4|Win32"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="2"
InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse2.vsprops;..\sse4.vsprops"
UseOfMFC="1"