GregMiscellaneous: zzogl-pg:

* regroup clut core function into one big files

Note: codeblock need to be updated. And I hope template are ms friendly :)


git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3931 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut@gmail.com 2010-10-17 15:49:34 +00:00
parent 01c171e9e7
commit 97cd280684
8 changed files with 242 additions and 763 deletions

View File

@ -46,6 +46,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release)
# zzogl sources
set(zzoglSources
Clut.cpp
GifTransfer.cpp
GLWin32.cpp
GLWinX11.cpp
@ -77,6 +78,7 @@ set(zzoglSources
# zzogl headers
set(zzoglHeaders
Clut.h
common.h
CRC.h
GifTransfer.h

View File

@ -17,8 +17,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef HOSTMEMORY_H_INCLUDED
#define HOSTMEMORY_H_INCLUDED
#ifndef HOSTMEMORY_H_INCLUDED
#define HOSTMEMORY_H_INCLUDED
extern int GPU_TEXWIDTH;
@ -45,7 +45,7 @@ class GSClut
u8* get();
u8* get(u32 addr);
u8* get_raw(u32 addr);
};
};
// The size in bytes of x strings (of texture).
inline int MemorySize(int x)
@ -64,7 +64,7 @@ inline u8* _MemoryAddress(int x)
{
return g_pbyGSMemory + mult * x;
}
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
@ -80,5 +80,5 @@ extern void TransferLocalLocal();
extern void TerminateLocalHost();
extern void TerminateHostLocal();
#endif // HOSTMEMORY_H_INCLUDED
#endif // HOSTMEMORY_H_INCLUDED

View File

@ -17,8 +17,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZKICK_H_INCLUDED
#define ZZKICK_H_INCLUDED
#ifndef ZZKICK_H_INCLUDED
#define ZZKICK_H_INCLUDED
#include "GS.h"
@ -43,7 +43,7 @@ extern void KickLine();
extern void KickTriangle();
extern void KickTriangleFan();
extern void KickSprite();
extern void KickDummy();
#endif // ZZKICK_H_INCLUDED
extern void KickDummy();
#endif // ZZKICK_H_INCLUDED

View File

@ -1,157 +1,157 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZLOG_H_INCLUDED
#define ZZLOG_H_INCLUDED
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZLOG_H_INCLUDED
#define ZZLOG_H_INCLUDED
#include "Util.h"
#include <cstring>
//Logging for errors that are called often should have a time counter.
#ifdef __LINUX__
static u32 __attribute__((unused)) lasttime = 0;
static u32 __attribute__((unused)) BigTime = 5000;
static bool __attribute__((unused)) SPAM_PASS;
#else
static u32 lasttime = 0;
static u32 BigTime = 5000;
static bool SPAM_PASS;
#endif
#define ERROR_LOG_SPAM(text) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
} \
}
// The same macro with one-argument substitution.
#define ERROR_LOG_SPAMA(fmt, value) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(fmt, value); \
lasttime = timeGetTime(); \
} \
}
#define ERROR_LOG_SPAM_TEST(text) {\
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
SPAM_PASS = true; \
} \
else \
SPAM_PASS = false; \
}
#if DEBUG_PROF
#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0))
#define FUNCLOG {\
static bool Was_Here = false; \
static unsigned long int waslasttime = 0; \
if (!Was_Here && FILE_IS_IN_CHECK) { \
Was_Here = true;\
ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \
waslasttime = timeGetTime(); \
} \
if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \
Was_Here = false; \
} \
}
#else
#define FUNCLOG
#endif
//#define WRITE_GREG_LOGS
//#define WRITE_PRIM_LOGS
#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD)
#define ZEROGS_DEVBUILD
#endif
// sends a message to output window if assert fails
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }
#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } }
#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } }
#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } }
#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } }
#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } }
#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } }
#define GL_REPORT_ERROR() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
HandleGLError(); \
} \
}
#ifdef _DEBUG
# define GL_REPORT_ERRORD() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
HandleGLError(); \
} \
}
#else
# define GL_REPORT_ERRORD()
#endif
inline const char *error_name(int err)
{
switch (err)
{
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
case GL_TABLE_TOO_LARGE:
return "GL_TABLE_TOO_LARGE";
case GL_INVALID_FRAMEBUFFER_OPERATION:
return "GL_INVALID_FRAMEBUFFER_OPERATION";
default:
return "Unknown GL error";
}
#include <cstring>
//Logging for errors that are called often should have a time counter.
#ifdef __LINUX__
static u32 __attribute__((unused)) lasttime = 0;
static u32 __attribute__((unused)) BigTime = 5000;
static bool __attribute__((unused)) SPAM_PASS;
#else
static u32 lasttime = 0;
static u32 BigTime = 5000;
static bool SPAM_PASS;
#endif
#define ERROR_LOG_SPAM(text) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
} \
}
// The same macro with one-argument substitution.
#define ERROR_LOG_SPAMA(fmt, value) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(fmt, value); \
lasttime = timeGetTime(); \
} \
}
#define ERROR_LOG_SPAM_TEST(text) {\
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
SPAM_PASS = true; \
} \
else \
SPAM_PASS = false; \
}
#if DEBUG_PROF
#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0))
#define FUNCLOG {\
static bool Was_Here = false; \
static unsigned long int waslasttime = 0; \
if (!Was_Here && FILE_IS_IN_CHECK) { \
Was_Here = true;\
ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \
waslasttime = timeGetTime(); \
} \
if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \
Was_Here = false; \
} \
}
#else
#define FUNCLOG
#endif
//#define WRITE_GREG_LOGS
//#define WRITE_PRIM_LOGS
#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD)
#define ZEROGS_DEVBUILD
#endif
// sends a message to output window if assert fails
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }
#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } }
#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } }
#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } }
#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } }
#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } }
#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } }
#define GL_REPORT_ERROR() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
HandleGLError(); \
} \
}
#ifdef _DEBUG
# define GL_REPORT_ERRORD() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
/* HandleGLError();*/ \
} \
}
#else
# define GL_REPORT_ERRORD()
#endif
inline const char *error_name(int err)
{
switch (err)
{
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
case GL_TABLE_TOO_LARGE:
return "GL_TABLE_TOO_LARGE";
case GL_INVALID_FRAMEBUFFER_OPERATION:
return "GL_INVALID_FRAMEBUFFER_OPERATION";
default:
return "Unknown GL error";
}
}
struct MESSAGE
@ -164,38 +164,38 @@ struct MESSAGE
u32 dwTimeStamp;
};
extern void DrawText(const char* pstr, int left, int top, u32 color);
extern void __LogToConsole(const char *fmt, ...);
extern void ZZAddMessage(const char* pstr, u32 ms = 5000);
extern void SetAA(int mode);
extern bool ZZCreate(int width, int height);
extern void ZZDestroy(bool bD3D);
extern void StartCapture();
extern void StopCapture();
namespace ZZLog
{
extern bool IsLogging();
void SetDir(const char* dir);
extern void Open();
extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
void WriteToScreen(const char* pstr, u32 ms = 5000);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void WriteLn(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Dev_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
};
#endif // ZZLOG_H_INCLUDED
extern void DrawText(const char* pstr, int left, int top, u32 color);
extern void __LogToConsole(const char *fmt, ...);
extern void ZZAddMessage(const char* pstr, u32 ms = 5000);
extern void SetAA(int mode);
extern bool ZZCreate(int width, int height);
extern void ZZDestroy(bool bD3D);
extern void StartCapture();
extern void StopCapture();
namespace ZZLog
{
extern bool IsLogging();
void SetDir(const char* dir);
extern void Open();
extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
void WriteToScreen(const char* pstr, u32 ms = 5000);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void WriteLn(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Dev_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
};
#endif // ZZLOG_H_INCLUDED

View File

@ -17,8 +17,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZOGLSHOOTS_H_INCLUDED
#define ZZOGLSHOOTS_H_INCLUDED
#ifndef ZZOGLSHOOTS_H_INCLUDED
#define ZZOGLSHOOTS_H_INCLUDED
void SaveSnapshot(const char* filename);
bool SaveRenderTarget(const char* filename, int width, int height, int jpeg);
@ -30,6 +30,6 @@ void Delete_Avi_Capture();
void StartCapture();
void StopCapture();
void CaptureFrame();
#endif // ZZOGLSHOOTS_H_INCLUDED
void CaptureFrame();
#endif // ZZOGLSHOOTS_H_INCLUDED

View File

@ -26,6 +26,7 @@
#include "zerogs.h"
#include "targets.h"
#include "ZZoglShaders.h"
#include "Clut.h"
#ifdef ZEROGS_SSE2
#include <emmintrin.h>
@ -1642,87 +1643,6 @@ void CMemoryTargetMngr::Destroy()
listClearedTargets.clear();
}
int memcmp_clut16(u16* pSavedBuffer, u16* pClutBuffer, int clutsize)
{
FUNCLOG
assert((clutsize&31) == 0);
// left > 0 only when csa < 16
int left = 0;
if (((u32)(uptr)pClutBuffer & 2) == 0)
{
left = (((u32)(uptr)pClutBuffer & 0x3ff) / 2) + clutsize - 512;
clutsize -= left;
}
while (clutsize > 0)
{
for (int i = 0; i < 16; ++i)
{
if (pSavedBuffer[i] != pClutBuffer[2*i]) return 1;
}
clutsize -= 32;
pSavedBuffer += 16;
pClutBuffer += 32;
}
if (left > 0)
{
pClutBuffer = (u16*)(g_pbyGSClut + 2);
while (left > 0)
{
for (int i = 0; i < 16; ++i)
{
if (pSavedBuffer[i] != pClutBuffer[2*i]) return 1;
}
left -= 32;
pSavedBuffer += 16;
pClutBuffer += 32;
}
}
return 0;
}
#if 0
bool CMemoryTarget::ValidateClut(const tex0Info& tex0)
{
FUNCLOG
assert(tex0.psm == psm && PSMT_ISCLUT(psm) && cpsm == tex0.cpsm);
int nClutOffset = 0, clutsize = 0;
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
if (PSMT_IS32BIT(tex0.cpsm)) // 32 bit
{
nClutOffset = 64 * tex0.csa;
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
}
else
{
nClutOffset = 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0);
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
}
assert(clutsize == clut.size());
if (PSMT_IS32BIT(cpsm))
{
if (memcmp_mmx(&clut[0], g_pbyGSClut + nClutOffset, clutsize)) return false;
}
else
{
if (memcmp_clut16((u16*)&clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize)) return false;
}
return true;
}
#endif
bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex)
{
FUNCLOG
@ -1783,113 +1703,6 @@ bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex,
return false;
}
// used to build clut textures (note that this is for both 16 and 32 bit cluts)
template <class T>
static __forceinline void BuildClut(u32 psm, u32 height, T* pclut, u8* psrc, T* pdst)
{
switch (psm)
{
case PSMT8:
for (u32 i = 0; i < height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH / 2; ++j)
{
pdst[0] = pclut[psrc[0]];
pdst[1] = pclut[psrc[1]];
pdst[2] = pclut[psrc[2]];
pdst[3] = pclut[psrc[3]];
pdst[4] = pclut[psrc[4]];
pdst[5] = pclut[psrc[5]];
pdst[6] = pclut[psrc[6]];
pdst[7] = pclut[psrc[7]];
pdst += 8;
psrc += 8;
}
}
break;
case PSMT4:
for (u32 i = 0; i < height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH; ++j)
{
pdst[0] = pclut[psrc[0] & 15];
pdst[1] = pclut[psrc[0] >> 4];
pdst[2] = pclut[psrc[1] & 15];
pdst[3] = pclut[psrc[1] >> 4];
pdst[4] = pclut[psrc[2] & 15];
pdst[5] = pclut[psrc[2] >> 4];
pdst[6] = pclut[psrc[3] & 15];
pdst[7] = pclut[psrc[3] >> 4];
pdst += 8;
psrc += 4;
}
}
break;
case PSMT8H:
for (u32 i = 0; i < height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
{
pdst[0] = pclut[psrc[3]];
pdst[1] = pclut[psrc[7]];
pdst[2] = pclut[psrc[11]];
pdst[3] = pclut[psrc[15]];
pdst[4] = pclut[psrc[19]];
pdst[5] = pclut[psrc[23]];
pdst[6] = pclut[psrc[27]];
pdst[7] = pclut[psrc[31]];
pdst += 8;
psrc += 32;
}
}
break;
case PSMT4HH:
for (u32 i = 0; i < height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
{
pdst[0] = pclut[psrc[3] >> 4];
pdst[1] = pclut[psrc[7] >> 4];
pdst[2] = pclut[psrc[11] >> 4];
pdst[3] = pclut[psrc[15] >> 4];
pdst[4] = pclut[psrc[19] >> 4];
pdst[5] = pclut[psrc[23] >> 4];
pdst[6] = pclut[psrc[27] >> 4];
pdst[7] = pclut[psrc[31] >> 4];
pdst += 8;
psrc += 32;
}
}
break;
case PSMT4HL:
for (u32 i = 0; i < height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH / 8; ++j)
{
pdst[0] = pclut[psrc[3] & 15];
pdst[1] = pclut[psrc[7] & 15];
pdst[2] = pclut[psrc[11] & 15];
pdst[3] = pclut[psrc[15] & 15];
pdst[4] = pclut[psrc[19] & 15];
pdst[5] = pclut[psrc[23] & 15];
pdst[6] = pclut[psrc[27] & 15];
pdst[7] = pclut[psrc[31] & 15];
pdst += 8;
psrc += 32;
}
}
break;
default:
assert(0);
}
}
#define TARGET_THRESH 0x500
extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture.
@ -1926,10 +1739,10 @@ int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const te
return 1;
if (PSMT_IS32BIT(tex0.cpsm)) {
if (memcmp_mmx(&it->clut[0], g_pbyGSClut + nClutOffset, clutsize))
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], (u32*)(g_pbyGSClut + nClutOffset), clutsize))
return 2;
} else {
if (memcmp_clut16((u16*)&it->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize))
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize))
return 2;
}
@ -2136,38 +1949,9 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
targ->clut.resize(clutsize);
if (PSMT_IS32BIT(tex0.cpsm))
{
memcpy_amd(&targ->clut[0], g_pbyGSClut + nClutOffset, clutsize);
}
else
{
u16* pClutBuffer = (u16*)(g_pbyGSClut + nClutOffset);
u16* pclut = (u16*) & targ->clut[0];
int left = ((u32)nClutOffset & 2) ? 0 : ((nClutOffset & 0x3ff) / 2) + clutsize - 512;
if (left > 0) clutsize -= left;
while (clutsize > 0)
{
pclut[0] = pClutBuffer[0];
pclut++;
pClutBuffer += 2;
clutsize -= 2;
}
if (left > 0)
{
pClutBuffer = (u16*)(g_pbyGSClut + 2);
while (left > 0)
{
pclut[0] = pClutBuffer[0];
left -= 2;
pClutBuffer += 2;
pclut++;
}
}
}
ClutBuffer_to_Array<u32>((u32*)&targ->clut[0], (u32*)(g_pbyGSClut + nClutOffset), clutsize);
else
ClutBuffer_to_Array<u16>((u16*)&targ->clut[0], (u16*)(g_pbyGSClut + nClutOffset), clutsize);
}
if (targ->ptex != NULL)
@ -2226,14 +2010,14 @@ CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forc
u32* pclut = (u32*) & targ->clut[0];
u32* pdst = (u32*)ptexdata;
BuildClut<u32>(tex0.psm, targ->height, pclut, psrc, pdst);
Build_Clut_Texture<u32>(tex0.psm, targ->height, pclut, psrc, pdst);
}
else
{
u16* pclut = (u16*) & targ->clut[0];
u16* pdst = (u16*)ptexdata;
BuildClut<u16>(tex0.psm, targ->height, pclut, psrc, pdst);
Build_Clut_Texture<u16>(tex0.psm, targ->height, pclut, psrc, pdst);
}
}
else

View File

@ -594,6 +594,7 @@ void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpit
// }
//}
#if 0
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut)
{
__m128i* src = (__m128i*)vm;
@ -1137,9 +1138,11 @@ __forceinline void WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa)
vm += 16; // go down one column
}
}
#endif
#endif // ZEROGS_SSE2
#if 0
void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* _vm, u32* _clut)
{
const static u32 map[] =
@ -1251,6 +1254,8 @@ void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut)
dst[7] = src[7];
}
#endif
void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters)
{

View File

@ -33,10 +33,7 @@
#include "GLWin.h"
#include "ZZoglShaders.h"
#include "ZZKick.h"
#ifdef ZEROGS_SSE2
#include <emmintrin.h>
#endif
#include "Clut.h"
//----------------------- Defines
@ -518,217 +515,6 @@ void ExtWrite()
// case 7: ASSERT(0); return false;
// default: __assume(0);
bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
{
int cpsm = ZZOglGet_cpsm_TexBits(highdword);
int csm = ZZOglGet_csm_TexBits(highdword);
if (cpsm > 1 || csm)
{
// Mana Khemia triggers this.
//ZZLog::Error_Log("16 bit clut not supported.");
return true;
}
int csa = ZZOglGet_csa_TexBits(highdword);
int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
u64* src = (u64*)(g_pbyGSMemory + cbp * 256);
u64* dst = (u64*)(g_pbyGSClut + 64 * csa);
bool bRet = false;
#define TEST_THIS
#ifdef TEST_THIS
while(entries != 0) {
#ifdef ZEROGS_SSE2
// Note: local memory datas are swizzles
__m128i src_0 = _mm_load_si128((__m128i*)src); // 9 8 1 0
__m128i src_1 = _mm_load_si128((__m128i*)src+1); // 11 10 3 2
__m128i src_2 = _mm_load_si128((__m128i*)src+2); // 13 12 5 4
__m128i src_3 = _mm_load_si128((__m128i*)src+3); // 15 14 7 6
__m128i dst_0 = _mm_load_si128((__m128i*)dst);
__m128i dst_1 = _mm_load_si128((__m128i*)dst+1);
__m128i dst_2 = _mm_load_si128((__m128i*)dst+2);
__m128i dst_3 = _mm_load_si128((__m128i*)dst+3);
__m128i result = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_0, src_1), dst_0);
__m128i result_tmp = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_2, src_3), dst_1);
result = _mm_and_si128(result, result_tmp);
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_0, src_1), dst_2);
result = _mm_and_si128(result, result_tmp);
result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_2, src_3), dst_3);
result = _mm_and_si128(result, result_tmp);
u32 result_int = _mm_movemask_epi8(result);
if (result_int != 0xFFFF) {
bRet = true;
break;
}
#else
// I see no point to keep an mmx version. SSE2 versions is probably faster.
// Keep a slow portable C version for reference/debug
// Note: local memory datas are swizzles
if (dst[0] != src[0] || dst[1] != src[2] || dst[2] != src[4] || dst[3] != src[6]
|| dst[4] != src[1] || dst[5] != src[3] || dst[6] != src[5] || dst[7] != src[7]) {
bRet = true;
break;
}
#endif
// go to the next memory block
src += 32;
// go back to the previous memory block then down one memory column
if (entries & 0x10) {
src -= (64-8);
}
// In case previous operation (down one column) cross the block boundary
// Go to the next block
if (entries == 0x90) {
src += 32;
}
dst += 8;
entries -= 16;
}
#else
// do a fast test with MMX
#ifdef _MSC_VER
int storeebx;
__asm
{
mov storeebx, ebx
mov edx, dst
mov ecx, src
mov ebx, entries
Start:
movq mm0, [edx]
movq mm1, [edx+8]
pcmpeqd mm0, [ecx]
pcmpeqd mm1, [ecx+16]
movq mm2, [edx+16]
movq mm3, [edx+24]
pcmpeqd mm2, [ecx+32]
pcmpeqd mm3, [ecx+48]
pand mm0, mm1
pand mm2, mm3
movq mm4, [edx+32]
movq mm5, [edx+40]
pcmpeqd mm4, [ecx+8]
pcmpeqd mm5, [ecx+24]
pand mm0, mm2
pand mm4, mm5
movq mm6, [edx+48]
movq mm7, [edx+56]
pcmpeqd mm6, [ecx+40]
pcmpeqd mm7, [ecx+56]
pand mm0, mm4
pand mm6, mm7
pand mm0, mm6
pmovmskb eax, mm0
cmp eax, 0xff
je Continue
mov bRet, 1
jmp Return
Continue:
cmp ebx, 16
jle Return
test ebx, 0x10
jz AddEcx
sub ecx, 448 // go back and down one column,
AddEcx:
add ecx, 256 // go to the right block
jne Continue1
add ecx, 256 // skip whole block
Continue1:
add edx, 64
sub ebx, 16
jmp Start
Return:
emms
mov ebx, storeebx
}
#else // linux
// do a fast test with MMX
__asm__(
".intel_syntax\n"
"Start:\n"
"movq %%mm0, [%%ecx]\n"
"movq %%mm1, [%%ecx+8]\n"
"pcmpeqd %%mm0, [%%edx]\n"
"pcmpeqd %%mm1, [%%edx+16]\n"
"movq %%mm2, [%%ecx+16]\n"
"movq %%mm3, [%%ecx+24]\n"
"pcmpeqd %%mm2, [%%edx+32]\n"
"pcmpeqd %%mm3, [%%edx+48]\n"
"pand %%mm0, %%mm1\n"
"pand %%mm2, %%mm3\n"
"movq %%mm4, [%%ecx+32]\n"
"movq %%mm5, [%%ecx+40]\n"
"pcmpeqd %%mm4, [%%edx+8]\n"
"pcmpeqd %%mm5, [%%edx+24]\n"
"pand %%mm0, %%mm2\n"
"pand %%mm4, %%mm5\n"
"movq %%mm6, [%%ecx+48]\n"
"movq %%mm7, [%%ecx+56]\n"
"pcmpeqd %%mm6, [%%edx+40]\n"
"pcmpeqd %%mm7, [%%edx+56]\n"
"pand %%mm0, %%mm4\n"
"pand %%mm6, %%mm7\n"
"pand %%mm0, %%mm6\n"
"pmovmskb %%eax, %%mm0\n"
"cmp %%eax, 0xff\n"
"je Continue\n"
".att_syntax\n"
"movb $1, %0\n"
".intel_syntax\n"
"jmp Return\n"
"Continue:\n"
"cmp %%esi, 16\n"
"jle Return\n"
"test %%esi, 0x10\n"
"jz AddEcx\n"
"sub %%edx, 448\n" // go back and down one column
"AddEcx:\n"
"add %%edx, 256\n" // go to the right block
"cmp %%esi, 0x90\n"
"jne Continue1\n"
"add %%edx, 256\n" // skip whole block
"Continue1:\n"
"add %%ecx, 64\n"
"sub %%esi, 16\n"
"jmp Start\n"
"Return:\n"
"emms\n"
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
#endif // _WIN32
#endif
return bRet;
}
// cld state:
// 000 - clut data is not loaded; data in the temp buffer is stored
// 001 - clut data is always loaded.
@ -769,16 +555,29 @@ bool CheckChangeInClut(u32 highdword, u32 psm)
if (gs.cbp[1] == cbp) return false;
break;
//case 4: return gs.cbp[0] != cbp;
//case 5: return gs.cbp[1] != cbp;
// default: load
default:
break;
}
return IsDirty(highdword, psm, cld, cbp);
// Compare the cache with current memory
// CSM2 is not supported
if (ZZOglGet_csm_TexBits(highdword))
return true;
int cpsm = ZZOglGet_cpsm_TexBits(highdword);
int csa = ZZOglGet_csa_TexBits(highdword);
int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
u8* GSMem = g_pbyGSMemory + cbp * 256;
if (PSMT_IS32BIT(cpsm))
return Cmp_ClutBuffer_GSMem<u32>((u32*)GSMem, csa, entries);
else {
// Mana Khemia triggers this.
//ZZLog::Error_Log("16 bit clut not supported.");
return Cmp_ClutBuffer_GSMem<u16>((u16*)GSMem, csa, entries);
}
}
void texClutWrite(int ctx)
@ -823,118 +622,7 @@ void texClutWrite(int ctx)
Flush(!ctx);
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
if (tex0.csm)
{
switch (tex0.cpsm)
{
// 16bit psm
// eggomania uses non16bit textures for csm2
case PSMCT16:
{
u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
u16 *dst = (u16*)(g_pbyGSClut + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
for (int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst += 2;
// check for wrapping
if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
}
break;
}
case PSMCT16S:
{
u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
u16 *dst = (u16*)(g_pbyGSClut + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
for (int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst += 2;
// check for wrapping
if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
}
break;
}
case PSMCT32:
case PSMCT24:
{
u32* src = (u32*)g_pbyGSMemory + tex0.cbp * 64;
u32 *dst = (u32*)(g_pbyGSClut + 64 * tex0.csa);
// check if address exceeds src
if (src + getPixelAddress32_0(gs.clut.cou + entries - 1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000)
ZZLog::Error_Log("texClutWrite out of bounds.");
else
for (int i = 0; i < entries; ++i)
{
*dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
dst++;
}
break;
}
default:
{
//ZZLog::Debug_Log("Unknown cpsm: %x (%x).", tex0.cpsm, tex0.psm);
break;
}
}
}
else
{
u32* src = (u32*)(g_pbyGSMemory + 256 * tex0.cbp);
if (entries == 16)
{
switch (tex0.cpsm)
{
case PSMCT24:
case PSMCT32:
WriteCLUT_T32_I4_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
break;
default:
#ifdef ZEROGS_SSE2
WriteCLUT_T16_I4_CSM1_sse2(src, tex0.csa);
#else
WriteCLUT_T16_I4_CSM1_c(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
#endif
break;
}
}
else
{
switch (tex0.cpsm)
{
case PSMCT24:
case PSMCT32:
WriteCLUT_T32_I8_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
break;
default:
// sse2 for 256 is more complicated, so use regular
#ifdef ZEROGS_SSE2
WriteCLUT_T16_I8_CSM1_sse2(src, tex0.csa);
#else
WriteCLUT_T16_I8_CSM1_c(src, (u32*)(g_pbyGSClut + 64*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
#endif
break;
}
}
}
// Write the memory to clut buffer
GSMem_to_ClutBuffer(tex0);
}