mirror of
https://github.com/PCSX2/gsdx-sourceforge.git
synced 2026-02-04 03:11:19 +01:00
This commit is contained in:
@@ -68,7 +68,7 @@ public:
|
||||
{
|
||||
GSVector4i v4((int)mask);
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
if(mask == 0xff000000 || mask == 0x00ffffff)
|
||||
{
|
||||
@@ -87,7 +87,7 @@ public:
|
||||
((GSVector4i*)dst)[i * 4 + 2] = ((GSVector4i*)dst)[i * 4 + 2].blend(v2, v4);
|
||||
((GSVector4i*)dst)[i * 4 + 3] = ((GSVector4i*)dst)[i * 4 + 3].blend(v3, v4);
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
}
|
||||
|
||||
@@ -1090,7 +1090,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 16; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1112,7 +1112,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 16; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1134,7 +1134,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 16; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1156,7 +1156,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 16; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1178,7 +1178,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1201,7 +1201,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1226,7 +1226,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1249,7 +1249,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1274,7 +1274,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1297,7 +1297,7 @@ public:
|
||||
{
|
||||
for(int j = 0; j < 8; j++, dst += dstpitch)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)dst;
|
||||
@@ -1318,8 +1318,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: UnpackAndWrite*
|
||||
|
||||
__forceinline static void UnpackAndWriteBlock24(BYTE* src, int srcpitch, BYTE* dst)
|
||||
{
|
||||
#if _M_SSE >= 0x200
|
||||
@@ -1755,7 +1753,7 @@ public:
|
||||
|
||||
__forceinline static void ReadAndExpandBlock8_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
@@ -1825,7 +1823,7 @@ public:
|
||||
|
||||
__forceinline static void ReadAndExpandBlock4_32(BYTE* src, BYTE* dst, int dstpitch, UINT64* pal)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
@@ -1917,7 +1915,7 @@ public:
|
||||
|
||||
__forceinline static void ReadAndExpandBlock8H_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
@@ -1968,7 +1966,7 @@ public:
|
||||
|
||||
__forceinline static void ReadAndExpandBlock4HL_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
@@ -2019,7 +2017,7 @@ public:
|
||||
|
||||
__forceinline static void ReadAndExpandBlock4HH_32(BYTE* src, BYTE* dst, int dstpitch, DWORD* pal)
|
||||
{
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v0, v1, v2, v3;
|
||||
|
||||
|
||||
@@ -30,11 +30,13 @@
|
||||
#define ASSERT_BLOCK(r, w, h) \
|
||||
ASSERT((r).Width() >= w && (r).Height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \
|
||||
|
||||
#define FOREACH_BLOCK_START(r, w, h, t) \
|
||||
for(int y = (r).top; y < (r).bottom; y += (h)) \
|
||||
{ ASSERT_BLOCK(r, w, h); \
|
||||
BYTE* ptr = dst + (y-(r).top)*dstpitch; \
|
||||
for(int x = (r).left; x < (r).right; x += (w)) \
|
||||
#define FOREACH_BLOCK_START(w, h, bpp) \
|
||||
DWORD bp = TEX0.TBP0; \
|
||||
DWORD bw = TEX0.TBW; \
|
||||
int offset = dstpitch * h - (r.right - r.left) * bpp / 8; \
|
||||
for(int y = r.top; y < r.bottom; y += h, dst += offset) \
|
||||
{ ASSERT_BLOCK(r, w, h); \
|
||||
for(int x = r.left; x < r.right; x += w, dst += w * bpp / 8) \
|
||||
{ \
|
||||
|
||||
#define FOREACH_BLOCK_END }}
|
||||
@@ -813,6 +815,9 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 4;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -832,14 +837,14 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < twa; x += 8)
|
||||
{
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, ty, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel32(x, ty, ((DWORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -857,14 +862,14 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < twa; x += 8)
|
||||
{
|
||||
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32(x, ty & ~7, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel32(x, ty, ((DWORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -882,7 +887,7 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -892,7 +897,7 @@ void GSLocalMemory::WriteImage32(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -905,6 +910,9 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -924,7 +932,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
|
||||
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -936,6 +944,9 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -955,14 +966,14 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -980,14 +991,14 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1005,7 +1016,7 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1015,7 +1026,7 @@ void GSLocalMemory::WriteImage16(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1028,6 +1039,9 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1047,14 +1061,14 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16S(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1072,14 +1086,14 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16S(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16S(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16S(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1097,7 +1111,7 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1107,7 +1121,7 @@ void GSLocalMemory::WriteImage16S(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1120,6 +1134,9 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1139,14 +1156,14 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
|
||||
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, ty, bp, bw)], src + (x - tx), srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 16; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel8(x, ty, src[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1164,14 +1181,14 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteColumn8<false>(ty, (BYTE*)&m_vm8[BlockAddress8(x, ty & ~15, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
|
||||
WriteColumn8<false>(ty, (BYTE*)&m_vm8[BlockAddress8(x, ty & ~15, bp, bw)], src + (x - tx), srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 4; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel8(x, ty, src[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1189,7 +1206,7 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
|
||||
WriteBlock8<false>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], src + (x - tx), srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1199,7 +1216,7 @@ void GSLocalMemory::WriteImage8(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
|
||||
WriteBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], src + (x - tx), srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1212,6 +1229,9 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1231,7 +1251,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 32)
|
||||
{
|
||||
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, ty, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 16; i++, ty++, src += srcpitch)
|
||||
@@ -1240,8 +1260,8 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
|
||||
for(int x = twa; x < tw; x += 2, s++)
|
||||
{
|
||||
WritePixel4(x, ty, *s & 0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
|
||||
WritePixel4(x + 1, ty, *s >> 4, BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel4(x, ty, *s & 0xf, bp, bw),
|
||||
WritePixel4(x + 1, ty, *s >> 4, bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1259,7 +1279,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 32)
|
||||
{
|
||||
WriteColumn4<false>(ty, (BYTE*)&m_vm8[BlockAddress4(x, ty & ~15, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
WriteColumn4<false>(ty, (BYTE*)&m_vm8[BlockAddress4(x, ty & ~15, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 4; i++, ty++, src += srcpitch)
|
||||
@@ -1268,8 +1288,8 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
|
||||
for(int x = twa; x < tw; x += 2, s++)
|
||||
{
|
||||
WritePixel4(x, ty, *s & 0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
|
||||
WritePixel4(x + 1, ty, *s >> 4, BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel4(x, ty, *s & 0xf, bp, bw),
|
||||
WritePixel4(x + 1, ty, *s >> 4, bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1287,7 +1307,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 32)
|
||||
{
|
||||
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
WriteBlock4<false>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1297,7 +1317,7 @@ void GSLocalMemory::WriteImage4(int& tx, int& ty, BYTE* src, int len, GIFRegBITB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 32)
|
||||
{
|
||||
WriteBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
WriteBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], src + (x - tx) / 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1310,6 +1330,9 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1329,7 +1352,7 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
|
||||
UnpackAndWriteBlock8H(src + (x - tx), srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1341,6 +1364,9 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1360,7 +1386,7 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
|
||||
UnpackAndWriteBlock4HL(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1372,6 +1398,9 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1391,7 +1420,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
|
||||
UnpackAndWriteBlock4HH(src + (x - tx) / 2, srcpitch, (BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1403,6 +1432,9 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 4;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1422,14 +1454,14 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 8)
|
||||
{
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, ty, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1447,14 +1479,14 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 8)
|
||||
{
|
||||
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32Z(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteColumn32<false, 0xffffffff>(ty, (BYTE*)&m_vm32[BlockAddress32Z(x, ty & ~7, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel32Z(x, ty, ((DWORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1472,7 +1504,7 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<false, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1482,7 +1514,7 @@ void GSLocalMemory::WriteImage32Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 4, srcpitch);
|
||||
WriteBlock32<true, 0xffffffff>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], src + (x - tx) * 4, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1495,6 +1527,9 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1514,7 +1549,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 8)
|
||||
{
|
||||
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)]);
|
||||
UnpackAndWriteBlock24(src + (x - tx) * 3, srcpitch, (BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1526,6 +1561,9 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1545,14 +1583,14 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1570,14 +1608,14 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16Z(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16Z(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16Z(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1595,7 +1633,7 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1605,7 +1643,7 @@ void GSLocalMemory::WriteImage16Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1618,6 +1656,9 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
|
||||
{
|
||||
if(TRXREG.RRW == 0) return;
|
||||
|
||||
DWORD bp = BITBLTBUF.DBP;
|
||||
DWORD bw = BITBLTBUF.DBW;
|
||||
|
||||
int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 2;
|
||||
int th = len / srcpitch;
|
||||
|
||||
@@ -1637,14 +1678,14 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, ty, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 8; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1662,14 +1703,14 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
|
||||
{
|
||||
for(int x = tx; x < twa; x += 16)
|
||||
{
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16SZ(x, ty & ~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteColumn16<false>(ty, (BYTE*)&m_vm16[BlockAddress16SZ(x, ty & ~7, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2; i++, ty++, src += srcpitch)
|
||||
{
|
||||
for(int x = twa; x < tw; x++)
|
||||
{
|
||||
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
|
||||
WritePixel16SZ(x, ty, ((WORD*)src)[x - tx], bp, bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1687,7 +1728,7 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<false>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1697,7 +1738,7 @@ void GSLocalMemory::WriteImage16SZ(int& tx, int& ty, BYTE* src, int len, GIFRegB
|
||||
{
|
||||
for(int x = tx; x < tw; x += 16)
|
||||
{
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx) * 2, srcpitch);
|
||||
WriteBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], src + (x - tx) * 2, srcpitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2065,28 +2106,29 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBL
|
||||
|
||||
void GSLocalMemory::ReadTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch);
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
else
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2096,11 +2138,11 @@ void GSLocalMemory::ReadTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRe
|
||||
{
|
||||
__declspec(align(16)) WORD block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16)
|
||||
FOREACH_BLOCK_START(16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2109,11 +2151,11 @@ void GSLocalMemory::ReadTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
{
|
||||
__declspec(align(16)) WORD block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16S)
|
||||
FOREACH_BLOCK_START(16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2122,9 +2164,9 @@ void GSLocalMemory::ReadTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFReg
|
||||
{
|
||||
DWORD* pal = m_clut32;
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 16, 8)
|
||||
FOREACH_BLOCK_START(16, 16, 32)
|
||||
{
|
||||
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2133,9 +2175,9 @@ void GSLocalMemory::ReadTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFReg
|
||||
{
|
||||
UINT64* pal = m_clut64;
|
||||
|
||||
FOREACH_BLOCK_START(r, 32, 16, 4)
|
||||
FOREACH_BLOCK_START(32, 16, 32)
|
||||
{
|
||||
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW) >> 1], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2144,9 +2186,9 @@ void GSLocalMemory::ReadTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRe
|
||||
{
|
||||
DWORD* pal = m_clut32;
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2155,9 +2197,9 @@ void GSLocalMemory::ReadTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
{
|
||||
DWORD* pal = m_clut32;
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2166,18 +2208,18 @@ void GSLocalMemory::ReadTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
{
|
||||
DWORD* pal = m_clut32;
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture32Z(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch);
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2186,17 +2228,17 @@ void GSLocalMemory::ReadTexture24Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
{
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ReadAndExpandBlock24<true>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
else
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ReadAndExpandBlock24<false>((BYTE*)&m_vm32[BlockAddress32Z(x, y, bp, bw)], dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2206,11 +2248,11 @@ void GSLocalMemory::ReadTexture16Z(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
{
|
||||
__declspec(align(16)) WORD block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16)
|
||||
FOREACH_BLOCK_START(16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2219,11 +2261,11 @@ void GSLocalMemory::ReadTexture16SZ(const CRect& r, BYTE* dst, int dstpitch, GIF
|
||||
{
|
||||
__declspec(align(16)) WORD block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16S)
|
||||
FOREACH_BLOCK_START(16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, ptr + (x - r.left) * 4, dstpitch, TEXA);
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2270,18 +2312,18 @@ void GSLocalMemory::ReadTextureNC(const CRect& r, BYTE* dst, int dstpitch, GIFRe
|
||||
|
||||
void GSLocalMemory::ReadTexture16NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16)
|
||||
FOREACH_BLOCK_START(16, 8, 16)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16SNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16S)
|
||||
FOREACH_BLOCK_START(16, 8, 16)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16S(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2292,9 +2334,9 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
|
||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 16, 16, 8)
|
||||
FOREACH_BLOCK_START(16, 16, 32)
|
||||
{
|
||||
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock8_32((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2304,11 +2346,11 @@ void GSLocalMemory::ReadTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
|
||||
__declspec(align(16)) BYTE block[16 * 16];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 16, 8)
|
||||
FOREACH_BLOCK_START(16, 16, 16)
|
||||
{
|
||||
ReadBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 16);
|
||||
ReadBlock8<true>((BYTE*)&m_vm8[BlockAddress8(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 16);
|
||||
|
||||
ExpandBlock8_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
|
||||
ExpandBlock8_16(block, dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2320,9 +2362,9 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
|
||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 32, 16, 4)
|
||||
FOREACH_BLOCK_START(32, 16, 32)
|
||||
{
|
||||
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW) >> 1], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4_32((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw) >> 1], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2332,11 +2374,11 @@ void GSLocalMemory::ReadTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFR
|
||||
|
||||
__declspec(align(16)) BYTE block[(32 / 2) * 16];
|
||||
|
||||
FOREACH_BLOCK_START(r, 32, 16, 4)
|
||||
FOREACH_BLOCK_START(32, 16, 16)
|
||||
{
|
||||
ReadBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block) / 16);
|
||||
ReadBlock4<true>((BYTE*)&m_vm8[BlockAddress4(x, y, bp, bw)>>1], (BYTE*)block, sizeof(block) / 16);
|
||||
|
||||
ExpandBlock4_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
|
||||
ExpandBlock4_16(block, dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2348,9 +2390,9 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIF
|
||||
|
||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock8H_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2360,11 +2402,11 @@ void GSLocalMemory::ReadTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIF
|
||||
|
||||
__declspec(align(16)) DWORD block[8 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 16)
|
||||
{
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock8H_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
|
||||
ExpandBlock8H_16(block, dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2376,9 +2418,9 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GI
|
||||
|
||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4HL_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2388,11 +2430,11 @@ void GSLocalMemory::ReadTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GI
|
||||
|
||||
__declspec(align(16)) DWORD block[8 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 16)
|
||||
{
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock4HL_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
|
||||
ExpandBlock4HL_16(block, dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2404,9 +2446,9 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
|
||||
|
||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 4, dstpitch, pal);
|
||||
ReadAndExpandBlock4HH_32((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2416,11 +2458,11 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
|
||||
|
||||
__declspec(align(16)) DWORD block[8 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
FOREACH_BLOCK_START(8, 8, 16)
|
||||
{
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block) / 8);
|
||||
ReadBlock32<true>((BYTE*)&m_vm32[BlockAddress32(x, y, bp, bw)], (BYTE*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock4HH_16(block, ptr + (x - r.left) * 2, dstpitch, pal);
|
||||
ExpandBlock4HH_16(block, dst, dstpitch, pal);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
@@ -2428,18 +2470,18 @@ void GSLocalMemory::ReadTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GI
|
||||
|
||||
void GSLocalMemory::ReadTexture16ZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16)
|
||||
FOREACH_BLOCK_START(16, 8, 16)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16Z(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16SZNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 16, 8, 16S)
|
||||
FOREACH_BLOCK_START(16, 8, 16)
|
||||
{
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x - r.left) * 2, dstpitch);
|
||||
ReadBlock16<true>((BYTE*)&m_vm16[BlockAddress16SZ(x, y, bp, bw)], dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
@@ -130,12 +130,14 @@ public:
|
||||
|
||||
static DWORD PageAddress8(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
return ((bp >> 5) + (y >> 6) * ((bw + 1) >> 1) + (x >> 7)) << 13;
|
||||
ASSERT((bw & 1) == 0);
|
||||
return ((bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7)) << 13;
|
||||
}
|
||||
|
||||
static DWORD PageAddress4(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
return ((bp >> 5) + (y >> 7) * ((bw + 1) >> 1) + (x >> 7)) << 14;
|
||||
ASSERT((bw & 1) == 0);
|
||||
return ((bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7)) << 14;
|
||||
}
|
||||
|
||||
static DWORD BlockAddress32(int x, int y, DWORD bp, DWORD bw)
|
||||
@@ -161,14 +163,16 @@ public:
|
||||
|
||||
static DWORD BlockAddress8(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
|
||||
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
|
||||
return (page + block) << 8;
|
||||
}
|
||||
|
||||
static DWORD BlockAddress4(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
|
||||
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
|
||||
return (page + block) << 9;
|
||||
}
|
||||
@@ -223,16 +227,18 @@ public:
|
||||
|
||||
static DWORD PixelAddressOrg8(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = bp + ((y >> 1) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
|
||||
DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
|
||||
DWORD word = ((page + block) << 8) + columnTable8[y & 15][x & 15];
|
||||
// ASSERT(word < 1024*1024*4);
|
||||
ASSERT(word < 1024*1024*4);
|
||||
return word;
|
||||
}
|
||||
|
||||
static DWORD PixelAddressOrg4(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = bp + ((y >> 2) & ~0x1f) * (bw >> 1) + ((x >> 2) & ~0x1f);
|
||||
DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
|
||||
DWORD word = ((page + block) << 9) + columnTable4[y & 15][x & 31];
|
||||
ASSERT(word < 1024*1024*8);
|
||||
@@ -289,14 +295,16 @@ public:
|
||||
|
||||
static __forceinline DWORD PixelAddress8(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = (bp >> 5) + (y >> 6) * ((bw + 1)>>1) + (x >> 7);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = (bp >> 5) + (y >> 6) * (bw >> 1) + (x >> 7);
|
||||
DWORD word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline DWORD PixelAddress4(int x, int y, DWORD bp, DWORD bw)
|
||||
{
|
||||
DWORD page = (bp >> 5) + (y >> 7) * ((bw + 1)>>1) + (x >> 7);
|
||||
ASSERT((bw & 1) == 0);
|
||||
DWORD page = (bp >> 5) + (y >> 7) * (bw >> 1) + (x >> 7);
|
||||
DWORD word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f];
|
||||
return word;
|
||||
}
|
||||
@@ -880,7 +888,7 @@ public:
|
||||
{
|
||||
case PSM_PSMCT32:
|
||||
case PSM_PSMZ32:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
@@ -892,7 +900,7 @@ public:
|
||||
break;
|
||||
case PSM_PSMCT24:
|
||||
case PSM_PSMZ24:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
@@ -907,7 +915,7 @@ public:
|
||||
case PSM_PSMCT16S:
|
||||
case PSM_PSMZ16:
|
||||
case PSM_PSMZ16S:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
c = addr.gather32_32(m_vm16);
|
||||
#else
|
||||
c = GSVector4i(
|
||||
@@ -933,7 +941,7 @@ public:
|
||||
switch(PSM)
|
||||
{
|
||||
case PSM_PSMZ32:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm32);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
@@ -944,7 +952,7 @@ public:
|
||||
#endif
|
||||
break;
|
||||
case PSM_PSMZ24:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm32) & 0x00ffffff;
|
||||
#else
|
||||
z = GSVector4i(
|
||||
@@ -957,7 +965,7 @@ public:
|
||||
break;
|
||||
case PSM_PSMZ16:
|
||||
case PSM_PSMZ16S:
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
z = addr.gather32_32(m_vm16);
|
||||
#else
|
||||
z = GSVector4i(
|
||||
|
||||
@@ -608,6 +608,11 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
|
||||
FlushWrite();
|
||||
|
||||
m_mem.WriteCLUT(r->TEX0, m_env.TEXCLUT);
|
||||
|
||||
if((m_env.CTXT[i].TEX0.TBW & 1) && (m_env.CTXT[i].TEX0.PSM == PSM_PSMT8 || m_env.CTXT[i].TEX0.PSM == PSM_PSMT4))
|
||||
{
|
||||
m_env.CTXT[i].TEX0.TBW &= ~1;
|
||||
}
|
||||
}
|
||||
|
||||
template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
|
||||
@@ -923,6 +928,16 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
|
||||
}
|
||||
|
||||
m_env.BITBLTBUF = r->BITBLTBUF;
|
||||
|
||||
if((m_env.BITBLTBUF.SBW & 1) && (m_env.BITBLTBUF.SPSM == PSM_PSMT8 || m_env.BITBLTBUF.SPSM == PSM_PSMT4))
|
||||
{
|
||||
m_env.BITBLTBUF.SBW &= ~1;
|
||||
}
|
||||
|
||||
if((m_env.BITBLTBUF.DBW & 1) && (m_env.BITBLTBUF.DPSM == PSM_PSMT8 || m_env.BITBLTBUF.DPSM == PSM_PSMT4))
|
||||
{
|
||||
m_env.BITBLTBUF.DBW &= ~1; // namcoXcapcom: 5, 11, refered to as 4, 10 in TEX0.TBW later
|
||||
}
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
|
||||
@@ -1059,24 +1074,14 @@ void GSState::FlushWrite(BYTE* mem, int len)
|
||||
|
||||
void GSState::Write(BYTE* mem, int len)
|
||||
{
|
||||
/**/
|
||||
/*
|
||||
TRACE(_T("Write len=%d DBP=%05x DBW=%d DPSM=%d DSAX=%d DSAY=%d RRW=%d RRH=%d\n"),
|
||||
len, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
|
||||
(int)m_env.TRXPOS.DSAX, (int)m_env.TRXPOS.DSAY,
|
||||
(int)m_env.TRXREG.RRW, (int)m_env.TRXREG.RRH);
|
||||
|
||||
*/
|
||||
if(len == 0) return;
|
||||
|
||||
if(m_game.title == CRC::NamcoXCapcom)
|
||||
{
|
||||
|
||||
if(m_env.BITBLTBUF.DBP == 0x03018 && m_env.BITBLTBUF.DBW == 11 && m_env.BITBLTBUF.DPSM == PSM_PSMT8
|
||||
|| m_env.BITBLTBUF.DBP == 0x03b80 && m_env.BITBLTBUF.DBW == 5 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||
{
|
||||
m_env.BITBLTBUF.DBW--; // WTF
|
||||
}
|
||||
}
|
||||
|
||||
if(m_y >= m_env.TRXREG.RRH) return; // TODO: handle overflow during writing data too (just chop len below somewhere)
|
||||
|
||||
// TODO: hmmmm
|
||||
|
||||
143
gsdx/GSVector.h
143
gsdx/GSVector.h
@@ -145,7 +145,7 @@ public:
|
||||
UINT32 rgba32() const
|
||||
{
|
||||
__m128i r = m;
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
r = _mm_packus_epi32(r, r);
|
||||
#else
|
||||
r = _mm_packs_epi32(r, r); // good enough for colors...
|
||||
@@ -157,7 +157,7 @@ public:
|
||||
UINT64 rgba64() const
|
||||
{
|
||||
__m128i r = m;
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
r = _mm_packus_epi32(r, r);
|
||||
#else
|
||||
r = _mm_packs_epi32(r, r); // good enough for colors...
|
||||
@@ -169,7 +169,7 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const
|
||||
{
|
||||
return GSVector4i(_mm_min_epi8(_mm_max_epi8(m, a), b));
|
||||
@@ -181,7 +181,7 @@ public:
|
||||
return GSVector4i(_mm_min_epi16(_mm_max_epi16(m, a), b));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
GSVector4i sat_i32(const GSVector4i& a, const GSVector4i& b) const
|
||||
{
|
||||
return GSVector4i(_mm_min_epi32(_mm_max_epi32(m, a), b));
|
||||
@@ -193,14 +193,14 @@ public:
|
||||
return GSVector4i(_mm_min_epu8(_mm_max_epu8(m, a), b));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
GSVector4i sat_u16(const GSVector4i& a, const GSVector4i& b) const
|
||||
{
|
||||
return GSVector4i(_mm_min_epu16(_mm_max_epu16(m, a), b));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
GSVector4i sat_u32(const GSVector4i& a, const GSVector4i& b) const
|
||||
{
|
||||
return GSVector4i(_mm_min_epu32(_mm_max_epu32(m, a), b));
|
||||
@@ -212,7 +212,7 @@ public:
|
||||
return GSVector4i(_mm_blendv_epi8(m, a, mask));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
template<int mask> GSVector4i blend16(const GSVector4i& a) const
|
||||
{
|
||||
return GSVector4i(_mm_blend_epi16(m, a, mask));
|
||||
@@ -246,7 +246,7 @@ public:
|
||||
return GSVector4i(_mm_packs_epi32(m, a));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
GSVector4i pu32(const GSVector4i& a) const
|
||||
{
|
||||
return GSVector4i(_mm_packus_epi32(m, a));
|
||||
@@ -403,6 +403,58 @@ public:
|
||||
return GSVector4i(_mm_srli_epi64(m, i));
|
||||
}
|
||||
|
||||
GSVector4i add8(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_add_epi8(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i add16(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_add_epi16(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i add32(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_add_epi32(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i sub8(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_sub_epi8(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i sub16(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_sub_epi16(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i sub32(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_sub_epi32(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i mul16hs(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_mulhi_epi16(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i mul16hu(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_mulhi_epu16(m, v.m));
|
||||
}
|
||||
|
||||
GSVector4i mul16l(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_mullo_epi16(m, v.m));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x301
|
||||
GSVector4i mul16hrs(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_mulhrs_epi16(m, v.m));
|
||||
}
|
||||
#endif
|
||||
|
||||
GSVector4i andnot(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_andnot_si128(v.m, m));
|
||||
@@ -413,7 +465,17 @@ public:
|
||||
return _mm_movemask_epi8(m);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
template<int i> GSVector4i insert16(int a) const
|
||||
{
|
||||
return GSVector4i(_mm_insert_epi16(m, a, i));
|
||||
}
|
||||
|
||||
template<int i> int extract16() const
|
||||
{
|
||||
return _mm_extract_epi16(m, i);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
template<int i> GSVector4i insert8(int a) const
|
||||
{
|
||||
@@ -425,16 +487,6 @@ public:
|
||||
return _mm_extract_epi8(m, i);
|
||||
}
|
||||
|
||||
template<int i> GSVector4i insert16(int a) const
|
||||
{
|
||||
return GSVector4i(_mm_insert_epi16(m, a, i));
|
||||
}
|
||||
|
||||
template<int i> int extract16() const
|
||||
{
|
||||
return _mm_extract_epi16(m, i);
|
||||
}
|
||||
|
||||
template<int i> GSVector4i insert32(int a) const
|
||||
{
|
||||
return GSVector4i(_mm_insert_epi32(m, a, i));
|
||||
@@ -862,7 +914,7 @@ public:
|
||||
return GSVector4i(0) == GSVector4i(0);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
static GSVector4i loadnt(const void* p)
|
||||
{
|
||||
return GSVector4i(_mm_stream_load_si128((__m128i*)p));
|
||||
@@ -919,6 +971,11 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
static void storent(void* p, const GSVector4i& v)
|
||||
{
|
||||
_mm_stream_si128((__m128i*)p, v.m);
|
||||
}
|
||||
|
||||
static void storel(void* p, const GSVector4i& v)
|
||||
{
|
||||
_mm_storel_epi64((__m128i*)p, v.m);
|
||||
@@ -940,7 +997,7 @@ public:
|
||||
GSVector4i::storeh(ph, v);
|
||||
}
|
||||
|
||||
template<bool aligned> static void store(const void* p, const GSVector4i& v)
|
||||
template<bool aligned> static void store(void* p, const GSVector4i& v)
|
||||
{
|
||||
if(aligned) _mm_store_si128((__m128i*)p, v.m);
|
||||
else _mm_storeu_si128((__m128i*)p, v.m);
|
||||
@@ -1186,6 +1243,11 @@ public:
|
||||
return (v1 < v2) | (v1 == v2);
|
||||
}
|
||||
|
||||
template<int i> GSVector4i shuffle() const
|
||||
{
|
||||
return GSVector4i(_mm_shuffle_epi32(m, _MM_SHUFFLE(i, i, i, i)));
|
||||
}
|
||||
|
||||
#define VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
||||
GSVector4i xs##ys##zs##ws() const {return GSVector4i(_mm_shuffle_epi32(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
GSVector4i xs##ys##zs##ws##l() const {return GSVector4i(_mm_shufflelo_epi16(m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
@@ -1392,7 +1454,7 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
template<int i> GSVector4 dp(const GSVector4& v) const
|
||||
{
|
||||
return GSVector4(_mm_dp_ps(m, v.m, i));
|
||||
@@ -1434,6 +1496,16 @@ public:
|
||||
return GSVector4(_mm_unpackhi_ps(m, a));
|
||||
}
|
||||
|
||||
GSVector4 l2h(const GSVector4& a) const
|
||||
{
|
||||
return GSVector4(_mm_movelh_ps(m, a));
|
||||
}
|
||||
|
||||
GSVector4 h2l(const GSVector4& a) const
|
||||
{
|
||||
return GSVector4(_mm_movehl_ps(m, a));
|
||||
}
|
||||
|
||||
GSVector4 andnot(const GSVector4& v) const
|
||||
{
|
||||
return GSVector4(_mm_andnot_ps(v.m, m));
|
||||
@@ -1461,8 +1533,26 @@ public:
|
||||
|
||||
__forceinline static void transpose(GSVector4& a, GSVector4& b, GSVector4& c, GSVector4& d)
|
||||
{
|
||||
_MM_TRANSPOSE4_PS(a.m, b.m, c.m, d.m);
|
||||
}
|
||||
GSVector4 v0 = a.xyxy(b);
|
||||
GSVector4 v1 = c.xyxy(d);
|
||||
GSVector4 v2 = a.zwzw(b);
|
||||
GSVector4 v3 = c.zwzw(d);
|
||||
|
||||
a = v0.xzxz(v1);
|
||||
b = v0.ywyw(v1);
|
||||
c = v2.xzxz(v3);
|
||||
d = v2.ywyw(v3);
|
||||
/*
|
||||
GSVector4 v0 = a.upl(b);
|
||||
GSVector4 v1 = a.uph(b);
|
||||
GSVector4 v2 = c.upl(d);
|
||||
GSVector4 v3 = c.uph(d);
|
||||
|
||||
a = v0.l2h(v2);
|
||||
b = v2.h2l(v0);
|
||||
c = v1.l2h(v3);
|
||||
d = v3.h2l(v1);
|
||||
*/ }
|
||||
|
||||
void operator += (const GSVector4& v)
|
||||
{
|
||||
@@ -1604,6 +1694,11 @@ public:
|
||||
return GSVector4(_mm_cmple_ps(v1, v2));
|
||||
}
|
||||
|
||||
template<int i> GSVector4 shuffle() const
|
||||
{
|
||||
return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
|
||||
}
|
||||
|
||||
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
||||
GSVector4 xs##ys##zs##ws() const {return GSVector4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
GSVector4 xs##ys##zs##ws(const GSVector4& v) const {return GSVector4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
|
||||
@@ -105,6 +105,30 @@ BOOL GSdxApp::InitInstance()
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static bool CheckSSE()
|
||||
{
|
||||
__try
|
||||
{
|
||||
static __m128i m;
|
||||
|
||||
#if _M_SSE >= 0x402
|
||||
m.m128i_i32[0] = _mm_popcnt_u32(1234);
|
||||
#elif _M_SSE >= 0x401
|
||||
m = _mm_packus_epi32(m, m);
|
||||
#elif _M_SSE >= 0x301
|
||||
m = _mm_alignr_epi8(m, m, 1);
|
||||
#elif _M_SSE >= 0x200
|
||||
m = _mm_packs_epi32(m, m);
|
||||
#endif
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
#define PS2E_LT_GS 0x01
|
||||
@@ -137,8 +161,10 @@ EXPORT_C_(char*) PS2EgetLibName()
|
||||
sl.AddTail(s);
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
sl.AddTail(_T("SSE4"));
|
||||
#if _M_SSE >= 0x402
|
||||
sl.AddTail(_T("SSE42"));
|
||||
#elif _M_SSE >= 0x401
|
||||
sl.AddTail(_T("SSE41"));
|
||||
#elif _M_SSE >= 0x301
|
||||
sl.AddTail(_T("SSSE3"));
|
||||
#elif _M_SSE >= 0x200
|
||||
@@ -221,6 +247,8 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
|
||||
{
|
||||
AFX_MANAGE_STATE(AfxGetStaticModuleState());
|
||||
|
||||
//
|
||||
|
||||
CString str;
|
||||
|
||||
str.Format(_T("d3dx9_%d.dll"), D3DX_SDK_VERSION);
|
||||
@@ -241,6 +269,18 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
|
||||
return -1;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
if(!CheckSSE())
|
||||
{
|
||||
CString str;
|
||||
str.Format(_T("This CPU does not support SSE %d.%02d"), _M_SSE >> 8, _M_SSE & 0xff);
|
||||
AfxMessageBox(str, MB_OK);
|
||||
return -1;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSclose();
|
||||
|
||||
// TODO
|
||||
@@ -555,6 +595,7 @@ EXPORT_C GSBenchmark(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow
|
||||
|
||||
//
|
||||
|
||||
//for(int tbw = 5; tbw <= 10; tbw++)
|
||||
for(int tbw = 5; tbw <= 10; tbw++)
|
||||
{
|
||||
int n = 256 << ((10 - tbw) * 2);
|
||||
|
||||
@@ -788,6 +788,7 @@
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug SSE4|Win32"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
|
||||
@@ -105,7 +105,7 @@
|
||||
|
||||
// sse4
|
||||
|
||||
#if _M_SSE >= 0x400
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
PreprocessorDefinitions="_M_SSE=0x401"
|
||||
EnableEnhancedInstructionSet="2"
|
||||
PreprocessorDefinitions="_M_SSE=0x400"
|
||||
/>
|
||||
</VisualStudioPropertySheet>
|
||||
|
||||
@@ -788,6 +788,7 @@
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug SSE4|Win32"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse2.vsprops;..\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
|
||||
Reference in New Issue
Block a user