This commit is contained in:
gabest
2008-07-29 16:36:45 +00:00
parent ca31d9b955
commit 6d7bb795e2
12 changed files with 173 additions and 253 deletions

View File

@@ -23,9 +23,10 @@
#include "GSClut.h"
#include "GSLocalMemory.h"
GSClut::GSClut()
GSClut::GSClut(const GSLocalMemory* mem)
: m_mem(mem)
{
BYTE* p = (BYTE*)_aligned_malloc(8192, 16);
BYTE* p = (BYTE*)VirtualAlloc(NULL, 8192, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
m_clut = (WORD*)&p[0]; // 1k + 1k for buffer overruns (sfex: PSM == PSM_PSMT8, CPSM == PSM_PSMCT32, CSA != 0)
m_buff32 = (DWORD*)&p[2048]; // 1k
@@ -63,31 +64,31 @@ GSClut::GSClut()
m_wc[0][PSM_PSMCT16S][PSM_PSMT4HL] = &GSClut::WriteCLUT16S_I4_CSM1;
m_wc[0][PSM_PSMCT16S][PSM_PSMT4HH] = &GSClut::WriteCLUT16S_I4_CSM1;
m_wc[1][PSM_PSMCT32][PSM_PSMT8] = &GSClut::WriteCLUT32_I8_CSM2;
m_wc[1][PSM_PSMCT32][PSM_PSMT8H] = &GSClut::WriteCLUT32_I8_CSM2;
m_wc[1][PSM_PSMCT32][PSM_PSMT4] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT32][PSM_PSMT4HL] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT32][PSM_PSMT4HH] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT24][PSM_PSMT8] = &GSClut::WriteCLUT32_I8_CSM2;
m_wc[1][PSM_PSMCT24][PSM_PSMT8H] = &GSClut::WriteCLUT32_I8_CSM2;
m_wc[1][PSM_PSMCT24][PSM_PSMT4] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT24][PSM_PSMT4HL] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT24][PSM_PSMT4HH] = &GSClut::WriteCLUT32_I4_CSM2;
m_wc[1][PSM_PSMCT16][PSM_PSMT8] = &GSClut::WriteCLUT16_I8_CSM2;
m_wc[1][PSM_PSMCT16][PSM_PSMT8H] = &GSClut::WriteCLUT16_I8_CSM2;
m_wc[1][PSM_PSMCT16][PSM_PSMT4] = &GSClut::WriteCLUT16_I4_CSM2;
m_wc[1][PSM_PSMCT16][PSM_PSMT4HL] = &GSClut::WriteCLUT16_I4_CSM2;
m_wc[1][PSM_PSMCT16][PSM_PSMT4HH] = &GSClut::WriteCLUT16_I4_CSM2;
m_wc[1][PSM_PSMCT16S][PSM_PSMT8] = &GSClut::WriteCLUT16S_I8_CSM2;
m_wc[1][PSM_PSMCT16S][PSM_PSMT8H] = &GSClut::WriteCLUT16S_I8_CSM2;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4] = &GSClut::WriteCLUT16S_I4_CSM2;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4HL] = &GSClut::WriteCLUT16S_I4_CSM2;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4HH] = &GSClut::WriteCLUT16S_I4_CSM2;
m_wc[1][PSM_PSMCT32][PSM_PSMT8] = &GSClut::WriteCLUT32_CSM2<256>;
m_wc[1][PSM_PSMCT32][PSM_PSMT8H] = &GSClut::WriteCLUT32_CSM2<256>;
m_wc[1][PSM_PSMCT32][PSM_PSMT4] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT32][PSM_PSMT4HL] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT32][PSM_PSMT4HH] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT24][PSM_PSMT8] = &GSClut::WriteCLUT32_CSM2<256>;
m_wc[1][PSM_PSMCT24][PSM_PSMT8H] = &GSClut::WriteCLUT32_CSM2<256>;
m_wc[1][PSM_PSMCT24][PSM_PSMT4] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT24][PSM_PSMT4HL] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT24][PSM_PSMT4HH] = &GSClut::WriteCLUT32_CSM2<16>;
m_wc[1][PSM_PSMCT16][PSM_PSMT8] = &GSClut::WriteCLUT16_CSM2<256>;
m_wc[1][PSM_PSMCT16][PSM_PSMT8H] = &GSClut::WriteCLUT16_CSM2<256>;
m_wc[1][PSM_PSMCT16][PSM_PSMT4] = &GSClut::WriteCLUT16_CSM2<16>;
m_wc[1][PSM_PSMCT16][PSM_PSMT4HL] = &GSClut::WriteCLUT16_CSM2<16>;
m_wc[1][PSM_PSMCT16][PSM_PSMT4HH] = &GSClut::WriteCLUT16_CSM2<16>;
m_wc[1][PSM_PSMCT16S][PSM_PSMT8] = &GSClut::WriteCLUT16S_CSM2<256>;
m_wc[1][PSM_PSMCT16S][PSM_PSMT8H] = &GSClut::WriteCLUT16S_CSM2<256>;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4] = &GSClut::WriteCLUT16S_CSM2<16>;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4HL] = &GSClut::WriteCLUT16S_CSM2<16>;
m_wc[1][PSM_PSMCT16S][PSM_PSMT4HH] = &GSClut::WriteCLUT16S_CSM2<16>;
}
GSClut::~GSClut()
{
_aligned_free(m_clut);
VirtualFree(m_clut, 0, MEM_RELEASE);
}
void GSClut::Invalidate()
@@ -95,30 +96,7 @@ void GSClut::Invalidate()
m_write.dirty = true;
}
bool GSClut::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
return m_write.dirty || m_write.TEX0.i64 != TEX0.i64 || m_write.TEXCLUT.i64 != TEXCLUT.i64;
}
bool GSClut::IsWriting(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
switch(TEX0.CLD)
{
case 0: return false;
case 1: break;
case 2: break;
case 3: break;
case 4: if(m_CBP[0] == TEX0.CBP) return false; break;
case 5: if(m_CBP[1] == TEX0.CBP) return false; break;
case 6: ASSERT(0); return false;
case 7: ASSERT(0); return false;
default: __assume(0);
}
return IsDirty(TEX0, TEXCLUT);
}
bool GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
switch(TEX0.CLD)
{
@@ -133,146 +111,103 @@ bool GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const G
default: __assume(0);
}
if(!IsDirty(TEX0, TEXCLUT))
{
return false;
}
// FIXME: return m_write.IsDirty(TEX0, TEXCLUT);
return m_write.dirty || !(GSVector4i::load<true>(&m_write) == GSVector4i::load(&TEX0, &TEXCLUT)).alltrue();
}
void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
m_write.TEX0 = TEX0;
m_write.TEXCLUT = TEXCLUT;
m_write.dirty = false;
m_read.dirty = true;
(this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT, mem);
return true;
(this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT);
}
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA == 0);
WriteCLUT_T32_I8_CSM1(&mem->m_vm32[mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T32_I8_CSM1(&m_mem->m_vm32[m_mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA < 16);
WriteCLUT_T32_I4_CSM1(&mem->m_vm32[mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T32_I4_CSM1(&m_mem->m_vm32[m_mem->BlockAddress32(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA < 16);
WriteCLUT_T16_I8_CSM1(&mem->m_vm16[mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I8_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
ASSERT(TEX0.CSA < 32);
WriteCLUT_T16_I4_CSM1(&mem->m_vm16[mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I4_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
WriteCLUT_T16_I8_CSM1(&mem->m_vm16[mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I8_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
WriteCLUT_T16_I4_CSM1(&mem->m_vm16[mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
WriteCLUT_T16_I4_CSM1(&m_mem->m_vm16[m_mem->BlockAddress16S(0, 0, TEX0.CBP, 1)], m_clut + (TEX0.CSA << 4));
}
void GSClut::WriteCLUT32_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
template<int n> void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 256; i++, x++)
{
DWORD dw = mem->ReadPixel32(x, y, bp, bw);
DWORD base = m_mem->PixelAddress32(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset32[TEXCLUT.COU << 4];
clut[i] = (WORD)(dw & 0xffff);
clut[i + 256] = (WORD)(dw >> 16);
for(int i = 0; i < n; i++)
{
DWORD c = (WORD)m_mem->ReadPixel32(base + offset[i]);
clut[i] = (WORD)(c & 0xffff);
clut[i + 256] = (WORD)(c >> 16);
}
}
void GSClut::WriteCLUT32_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
template<int n> void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 16; i++, x++)
{
DWORD dw = mem->ReadPixel32(x, y, bp, bw);
DWORD base = m_mem->PixelAddress16(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset16[TEXCLUT.COU << 4];
clut[i] = (WORD)(dw & 0xffff);
clut[i + 256] = (WORD)(dw >> 16);
for(int i = 0; i < n; i++)
{
clut[i] = (WORD)m_mem->ReadPixel16(base + offset[i]);
}
}
void GSClut::WriteCLUT16_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
template<int n> void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 256; i++, x++)
DWORD base = m_mem->PixelAddress16S(0, TEXCLUT.COV, TEX0.CBP, TEXCLUT.CBW);
int* offset = &m_mem->rowOffset16S[TEXCLUT.COU << 4];
for(int i = 0; i < n; i++)
{
clut[i] = (WORD)mem->ReadPixel16(x, y, bp, bw);
}
}
void GSClut::WriteCLUT16_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 16; i++, x++)
{
clut[i] = (WORD)mem->ReadPixel16(x, y, bp, bw);
}
}
void GSClut::WriteCLUT16S_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 256; i++, x++)
{
clut[i] = (WORD)mem->ReadPixel16S(x, y, bp, bw);
}
}
void GSClut::WriteCLUT16S_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem)
{
DWORD bp = TEX0.CBP;
DWORD bw = TEXCLUT.CBW;
WORD* clut = m_clut + (TEX0.CSA << 4);
for(int i = 0, x = TEXCLUT.COU << 4, y = TEXCLUT.COV; i < 16; i++, x++)
{
clut[i] = (WORD)mem->ReadPixel16S(x, y, bp, bw);
clut[i] = (WORD)m_mem->ReadPixel16(base + offset[i]);
}
}
void GSClut::Read(const GIFRegTEX0& TEX0)
{
if(m_read.dirty || m_read.TEX0.i64 != TEX0.i64)
if(m_read.IsDirty(TEX0))
{
m_read.TEX0 = TEX0;
m_read.dirty = false;
@@ -314,7 +249,7 @@ void GSClut::Read(const GIFRegTEX0& TEX0)
void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
if(m_read.dirty || m_read.TEX0.i64 != TEX0.i64 || m_read.TEXA.i64 != TEXA.i64)
if(m_read.IsDirty(TEX0, TEXA))
{
m_read.TEX0 = TEX0;
m_read.TEXA = TEXA;
@@ -847,3 +782,20 @@ void GSClut::Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, cons
#endif
}
//
bool GSClut::WriteState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
{
return dirty || !(GSVector4i::load<true>(this) == GSVector4i::load(&TEX0, &TEXCLUT)).alltrue();
}
bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0)
{
return dirty || !(GSVector4i::load<true>(this) == GSVector4i::load(&TEX0, &this->TEXA)).alltrue();
}
bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
return dirty || !(GSVector4i::load<true>(this) == GSVector4i::load(&TEX0, &TEXA)).alltrue();
}

View File

@@ -24,49 +24,52 @@
#include "GS.h"
#include "GSVector.h"
#include "GSTables.h"
#include "GSAlignedClass.h"
class GSLocalMemory;
class GSClut
__declspec(align(16)) class GSClut : public GSAlignedClass<16>
{
const GSLocalMemory* m_mem;
DWORD m_CBP[2];
WORD* m_clut;
DWORD* m_buff32;
UINT64* m_buff64;
struct
__declspec(align(16)) struct WriteState
{
GIFRegTEX0 TEX0;
GIFRegTEXCLUT TEXCLUT;
bool dirty;
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
} m_write;
struct
__declspec(align(16)) struct ReadState
{
GIFRegTEX0 TEX0;
GIFRegTEXA TEXA;
bool dirty;
bool IsDirty(const GIFRegTEX0& TEX0);
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
} m_read;
typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
writeCLUT m_wc[2][16][64];
void WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT32_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT32_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16S_I8_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
void WriteCLUT16S_I4_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
template<int n> void WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
template<int n> void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem) {}
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {}
static void WriteCLUT_T32_I8_CSM1(const DWORD* RESTRICT src, WORD* RESTRICT clut);
static void WriteCLUT_T32_I4_CSM1(const DWORD* RESTRICT src, WORD* RESTRICT clut);
@@ -88,13 +91,12 @@ class GSClut
static void Expand16(const WORD* RESTRICT src, DWORD* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
public:
GSClut();
GSClut(const GSLocalMemory* mem);
virtual ~GSClut();
void Invalidate();
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
bool IsWriting(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
bool Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT, const GSLocalMemory* mem);
bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void Read(const GIFRegTEX0& TEX0);
void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);

View File

@@ -43,10 +43,6 @@ public:
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
GSLocalMemory::psm_t* ftbl;
GSLocalMemory::psm_t* ztbl;
GSLocalMemory::psm_t* ttbl;
__declspec(align(16)) struct
{
GSVector4i dx10;
@@ -55,10 +51,7 @@ public:
GSVector4 sw;
} scissor;
GSDrawingContext()
: ftbl(NULL)
, ztbl(NULL)
, ttbl(NULL)
GSDrawingContext()
{
Reset();
}

View File

@@ -73,6 +73,7 @@ GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
//
GSLocalMemory::GSLocalMemory()
: m_clut(this)
{
// TODO: MEM_WRITE_WATCH

View File

@@ -104,6 +104,10 @@ protected:
return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
}
// TODO
friend class GSClut;
public:
GSLocalMemory();
virtual ~GSLocalMemory();

View File

@@ -208,9 +208,9 @@ int GSRasterizer::Draw(Vertex* vertices, int count)
// m_slenv
m_slenv.steps = 0;
m_slenv.rtx = context->ttbl->rtx;
m_slenv.fo = context->ftbl->rowOffset[0];
m_slenv.zo = context->ztbl->rowOffset[0];
m_slenv.rtx = GSLocalMemory::m_psm[context->TEX0.PSM].rtx;
m_slenv.fo = GSLocalMemory::m_psm[context->FRAME.PSM].rowOffset[0];
m_slenv.zo = GSLocalMemory::m_psm[context->ZBUF.PSM].rowOffset[0];
m_slenv.fm = GSVector4i(context->FRAME.FBMSK);
m_slenv.zm = GSVector4i(context->ZBUF.ZMSK ? 0xffffffff : 0);
m_slenv.datm = GSVector4i(context->TEST.DATM ? 0x80000000 : 0);
@@ -733,9 +733,11 @@ void GSRasterizer::SetupColumnOffset()
m_fbco->hash = hash;
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[context->FRAME.PSM].pa;
for(int i = 0, j = 1024; i < j; i++)
{
m_fbco->addr[i] = GSVector4i((int)context->ftbl->pa(0, i, context->FRAME.Block(), context->FRAME.FBW));
m_fbco->addr[i] = GSVector4i((int)pa(0, i, context->FRAME.Block(), context->FRAME.FBW));
}
m_comap.SetAt(hash, m_fbco);
@@ -760,9 +762,11 @@ void GSRasterizer::SetupColumnOffset()
m_zbco->hash = hash;
GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[context->ZBUF.PSM].pa;
for(int i = 0, j = 1024; i < j; i++)
{
m_zbco->addr[i] = GSVector4i((int)context->ztbl->pa(0, i, context->ZBUF.Block(), context->FRAME.FBW));
m_zbco->addr[i] = GSVector4i((int)pa(0, i, context->ZBUF.Block(), context->FRAME.FBW));
}
m_comap.SetAt(hash, m_zbco);
@@ -883,7 +887,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
default: __assume(0);
}
if(test.mask() == 0xffff)
if(test.alltrue())
{
continue;
}
@@ -997,7 +1001,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
fm |= t;
zm |= t;
test |= t;
if(test.mask() == 0xffff) continue;
if(test.alltrue()) continue;
break;
case 1:
zm |= t;
@@ -1061,7 +1065,7 @@ void GSRasterizer::DrawScanline(int top, int left, int right, const Vertex& v)
{
test |= (d ^ m_slenv.datm).sra32(31);
if(test.mask() == 0xffff)
if(test.alltrue())
{
continue;
}

View File

@@ -774,7 +774,7 @@ void GSRasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v)
default: __assume(0);
}
if(test.mask() == 0xffff)
if(test.alltrue())
{
continue;
}
@@ -888,7 +888,7 @@ void GSRasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v)
fm |= t;
zm |= t;
test |= t;
if(test.mask() == 0xffff) continue;
if(test.alltrue()) continue;
break;
case 1:
zm |= t;
@@ -952,7 +952,7 @@ void GSRasterizer::DrawScanlineEx(int top, int left, int right, const Vertex& v)
{
test |= (d ^ m_slenv.datm).sra32(31);
if(test.mask() == 0xffff)
if(test.alltrue())
{
continue;
}

View File

@@ -137,14 +137,6 @@ void GSState::Reset()
m_context = &m_env.CTXT[0];
m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM];
m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM];
m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM];
m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM];
m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM];
m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM];
m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM);
InvalidateTextureCache();
@@ -587,7 +579,9 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
{
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
if(PRIM->CTXT == i && m_env.CTXT[i].TEX0.i64 != r->TEX0.i64 || m_mem.m_clut.IsWriting(r->TEX0, m_env.TEXCLUT))
bool wt = m_mem.m_clut.WriteTest(r->TEX0, m_env.TEXCLUT);
if(wt || PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].TEX0) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -604,14 +598,15 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
m_env.CTXT[i].TEX0.TBW &= ~1; // GS User 2.6
}
m_env.CTXT[i].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[i].TEX0.PSM];
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT, &m_mem);
if(wt)
{
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
}
}
template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].CLAMP.i64 != r->CLAMP.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].CLAMP) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -652,7 +647,7 @@ void GSState::GIFRegHandlerNOP(GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEX1(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].TEX1.i64 != r->TEX1.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].TEX1) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -673,7 +668,7 @@ template<int i> void GSState::GIFRegHandlerTEX2(GIFReg* r)
template<int i> void GSState::GIFRegHandlerXYOFFSET(GIFReg* r)
{
if(m_env.CTXT[i].XYOFFSET.i64 != r->XYOFFSET.i64)
if(!(GSVector4i::loadl(&m_env.CTXT[i].XYOFFSET) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -685,7 +680,7 @@ template<int i> void GSState::GIFRegHandlerXYOFFSET(GIFReg* r)
void GSState::GIFRegHandlerPRMODECONT(GIFReg* r)
{
if(m_env.PRMODECONT.i64 != r->PRMODECONT.i64)
if(!(GSVector4i::loadl(&m_env.PRMODECONT) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -717,7 +712,7 @@ void GSState::GIFRegHandlerPRMODE(GIFReg* r)
void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
{
if(m_env.TEXCLUT.i64 != r->TEXCLUT.i64)
if(!(GSVector4i::loadl(&m_env.TEXCLUT) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -727,7 +722,7 @@ void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
{
if(m_env.SCANMSK.i64 != r->SCANMSK.i64)
if(!(GSVector4i::loadl(&m_env.SCANMSK) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -737,7 +732,7 @@ void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP1.i64 != r->MIPTBP1.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].MIPTBP1) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -747,7 +742,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP2.i64 != r->MIPTBP2.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].MIPTBP2) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -757,7 +752,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
void GSState::GIFRegHandlerTEXA(GIFReg* r)
{
if(m_env.TEXA.i64 != r->TEXA.i64)
if(!(GSVector4i::loadl(&m_env.TEXA) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -767,7 +762,7 @@ void GSState::GIFRegHandlerTEXA(GIFReg* r)
void GSState::GIFRegHandlerFOGCOL(GIFReg* r)
{
if(m_env.FOGCOL.i64 != r->FOGCOL.i64)
if(!(GSVector4i::loadl(&m_env.FOGCOL) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -784,7 +779,7 @@ void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r)
template<int i> void GSState::GIFRegHandlerSCISSOR(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].SCISSOR.i64 != r->SCISSOR.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].SCISSOR) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -796,7 +791,7 @@ template<int i> void GSState::GIFRegHandlerSCISSOR(GIFReg* r)
template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].ALPHA.i64 != r->ALPHA.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].ALPHA) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -815,7 +810,7 @@ template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
void GSState::GIFRegHandlerDIMX(GIFReg* r)
{
if(m_env.DIMX.i64 != r->DIMX.i64)
if(!(GSVector4i::loadl(&m_env.DIMX) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -825,7 +820,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r)
void GSState::GIFRegHandlerDTHE(GIFReg* r)
{
if(m_env.DTHE.i64 != r->DTHE.i64)
if((GSVector4i::loadl(&m_env.DTHE) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -835,7 +830,7 @@ void GSState::GIFRegHandlerDTHE(GIFReg* r)
void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
{
if(m_env.COLCLAMP.i64 != r->COLCLAMP.i64)
if(!(GSVector4i::loadl(&m_env.COLCLAMP) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -845,7 +840,7 @@ void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].TEST.i64 != r->TEST.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].TEST) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -855,7 +850,7 @@ template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
void GSState::GIFRegHandlerPABE(GIFReg* r)
{
if(m_env.PABE.i64 != r->PABE.i64)
if(!(GSVector4i::loadl(&m_env.PABE) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -865,7 +860,7 @@ void GSState::GIFRegHandlerPABE(GIFReg* r)
template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].FBA.i64 != r->FBA.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].FBA) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -875,14 +870,12 @@ template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
template<int i> void GSState::GIFRegHandlerFRAME(GIFReg* r)
{
if(PRIM->CTXT == i && m_env.CTXT[i].FRAME.i64 != r->FRAME.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].FRAME) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
m_env.CTXT[i].FRAME = r->FRAME;
m_env.CTXT[i].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[i].FRAME.PSM];
}
template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
@@ -896,7 +889,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
r->ZBUF.PSM |= 0x30;
if(PRIM->CTXT == i && m_env.CTXT[i].ZBUF.i64 != r->ZBUF.i64)
if(PRIM->CTXT == i && !(GSVector4i::loadl(&m_env.CTXT[i].ZBUF) == GSVector4i::loadl(r)).alltrue())
{
Flush();
}
@@ -910,13 +903,11 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
{
m_env.CTXT[i].ZBUF.PSM = PSM_PSMZ32;
}
m_env.CTXT[i].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[i].ZBUF.PSM];
}
void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
{
if(m_env.BITBLTBUF.i64 != r->BITBLTBUF.i64)
if(!(GSVector4i::loadl(&m_env.BITBLTBUF) == GSVector4i::loadl(r)).alltrue())
{
FlushWrite();
}
@@ -936,7 +927,7 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
{
if(m_env.TRXPOS.i64 != r->TRXPOS.i64)
if(!(GSVector4i::loadl(&m_env.TRXPOS) == GSVector4i::loadl(r)).alltrue())
{
FlushWrite();
}
@@ -946,7 +937,8 @@ void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
void GSState::GIFRegHandlerTRXREG(GIFReg* r)
{
if(m_env.TRXREG.i64 != r->TRXREG.i64 || m_env.TRXREG2.i64 != r->TRXREG.i64)
if(!(GSVector4i::loadl(&m_env.TRXREG) == GSVector4i::loadl(r)).alltrue()
|| !(GSVector4i::loadl(&m_env.TRXREG2) == GSVector4i::loadl(r)).alltrue())
{
FlushWrite();
}
@@ -1590,14 +1582,7 @@ int GSState::Defrost(const freezeData* fd)
m_vprim = GSUtil::GetPrimVertexCount(PRIM->PRIM);
m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM];
m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM];
m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM];
m_env.CTXT[0].UpdateScissor();
m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM];
m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM];
m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM];
m_env.CTXT[1].UpdateScissor();
m_perfmon.SetFrame(5000);

View File

@@ -261,22 +261,6 @@ public:
GSRenderTarget* GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb = false)
{
POSITION pos = m_tex.GetHeadPosition();
/*
while(pos)
{
POSITION cur = pos;
GSTexture* t = m_tex.GetNext(pos);
if(GSUtil::HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
m_tex.RemoveAt(cur);
delete t;
}
}
*/
GSRenderTarget* rt = NULL;
if(rt == NULL)
@@ -363,22 +347,6 @@ public:
GSDepthStencil* GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h)
{
POSITION pos = m_tex.GetHeadPosition();
/*
while(pos)
{
POSITION cur = pos;
GSTexture* t = m_tex.GetNext(pos);
if(GSUtil::HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
m_tex.RemoveAt(cur);
delete t;
}
}
*/
GSDepthStencil* ds = NULL;
if(ds == NULL)

View File

@@ -279,6 +279,7 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt)
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)

View File

@@ -46,7 +46,7 @@ public:
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).mask() != 0xffff)
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
{
a[0] = b0;
a[1] = b1;
@@ -106,7 +106,7 @@ public:
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
if(((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4)).mask() != 0xffff)
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4)).alltrue())
{
a[0] = b0;
a[1] = b1;

View File

@@ -588,6 +588,11 @@ public:
return _mm_movemask_epi8(m);
}
bool alltrue() const
{
return _mm_movemask_epi8(m) == 0xffff;
}
template<int i> GSVector4i insert16(int a) const
{
return GSVector4i(_mm_insert_epi16(m, a, i));
@@ -1268,7 +1273,7 @@ public:
v &= d[i] == s[i];
}
return v.mask() == 0xffff;
return v.alltrue();
}
__forceinline static bool update(const void* dst, const void* src, int size)
@@ -1289,7 +1294,7 @@ public:
d[i] = s[i];
}
return v.mask() == 0xffff;
return v.alltrue();
}
void operator += (const GSVector4i& v)
@@ -1715,6 +1720,11 @@ public:
return _mm_movemask_ps(m);
}
bool alltrue() const
{
return _mm_movemask_ps(m) == 0xf;
}
static GSVector4 zero()
{
return GSVector4(_mm_setzero_ps());