This commit is contained in:
gabest
2008-12-27 13:00:04 +00:00
parent b2fe3a636a
commit 96d652e52d
14 changed files with 1844 additions and 1529 deletions

View File

@@ -22,8 +22,9 @@
#include "StdAfx.h"
#include "GPUDrawScanline.h"
GPUDrawScanline::GPUDrawScanline(GPUState* state)
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
: m_state(state)
, m_id(id)
{
Init();
}
@@ -34,7 +35,7 @@ GPUDrawScanline::~GPUDrawScanline()
// IDrawScanline
bool GPUDrawScanline::SetupDraw(const GSRasterizerData* data)
bool GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
{
GPUDrawingEnvironment& env = m_state->m_env;

View File

@@ -101,18 +101,20 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
protected:
GPUState* m_state;
int m_filter;
int m_dither;
int m_id;
public:
GPUDrawScanline(GPUState* state);
GPUDrawScanline(GPUState* state, int id);
virtual ~GPUDrawScanline();
// IDrawScanline
bool SetupDraw(const GSRasterizerData* data);
bool BeginDraw(const GSRasterizerData* data);
void EndDraw(const GSRasterizerStats& stats) {}
void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
DrawScanlinePtr GetDrawScanlinePtr();
void PrintStats() {}
};

View File

@@ -164,14 +164,15 @@ protected:
default: __assume(0);
}
int prims = m_rl.Draw(&data);
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Prim, prims);
m_perfmon.Put(GSPerfMon::Draw, 1);
int pixels = m_rl.GetPixels();
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
// TODO

View File

@@ -96,7 +96,7 @@ public:
#else
const DWORD* d = &columnTable32[(i & 3) << 1][0];
const BYTE* d = &columnTable32[(i & 3) << 1][0];
for(int j = 0; j < 2; j++, d += 8, src += srcpitch)
{
@@ -138,7 +138,7 @@ public:
#else
const DWORD* d = &columnTable16[(i & 3) << 1][0];
const BYTE* d = &columnTable16[(i & 3) << 1][0];
for(int j = 0; j < 2; j++, d += 16, src += srcpitch)
{
@@ -182,7 +182,7 @@ public:
#else
const DWORD* d = &columnTable8[(i & 3) << 2][0];
const BYTE* d = &columnTable8[(i & 3) << 2][0];
for(int j = 0; j < 4; j++, d += 16, src += srcpitch)
{
@@ -229,7 +229,7 @@ public:
#else
const DWORD* d = &columnTable4[(i & 3) << 2][0];
const WORD* d = &columnTable4[(i & 3) << 2][0];
for(int j = 0; j < 4; j++, d += 32, src += srcpitch)
{
@@ -307,7 +307,7 @@ public:
#else
const DWORD* d = &columnTable32[0][0];
const BYTE* d = &columnTable32[0][0];
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
{
@@ -341,7 +341,7 @@ public:
#else
const DWORD* d = &columnTable16[0][0];
const BYTE* d = &columnTable16[0][0];
for(int j = 0; j < 8; j++, d += 16, src += srcpitch)
{
@@ -368,7 +368,7 @@ public:
#else
const DWORD* d = &columnTable8[0][0];
const BYTE* d = &columnTable8[0][0];
for(int j = 0; j < 16; j++, d += 16, src += srcpitch)
{
@@ -395,7 +395,7 @@ public:
#else
const DWORD* d = &columnTable4[0][0];
const WORD* d = &columnTable4[0][0];
for(int j = 0; j < 16; j++, d += 32, src += srcpitch)
{
@@ -434,7 +434,7 @@ public:
#else
const DWORD* s = &columnTable32[(i & 3) << 1][0];
const BYTE* s = &columnTable32[(i & 3) << 1][0];
for(int j = 0; j < 2; j++, s += 8, dst += dstpitch)
{
@@ -492,7 +492,7 @@ public:
#else
const DWORD* s = &columnTable16[(i & 3) << 1][0];
const BYTE* s = &columnTable16[(i & 3) << 1][0];
for(int j = 0; j < 2; j++, s += 16, dst += dstpitch)
{
@@ -573,7 +573,7 @@ public:
#else
const DWORD* s = &columnTable8[(i & 3) << 2][0];
const BYTE* s = &columnTable8[(i & 3) << 2][0];
for(int j = 0; j < 4; j++, s += 16, dst += dstpitch)
{
@@ -660,7 +660,7 @@ public:
#else
const DWORD* s = &columnTable4[(i & 3) << 2][0];
const WORD* s = &columnTable4[(i & 3) << 2][0];
for(int j = 0; j < 4; j++, s += 32, dst += dstpitch)
{
@@ -738,7 +738,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -765,7 +765,7 @@ public:
#else
const DWORD* s = &columnTable16[0][0];
const BYTE* s = &columnTable16[0][0];
for(int j = 0; j < 8; j++, s += 16, dst += dstpitch)
{
@@ -792,7 +792,7 @@ public:
#else
const DWORD* s = &columnTable8[0][0];
const BYTE* s = &columnTable8[0][0];
for(int j = 0; j < 16; j++, s += 16, dst += dstpitch)
{
@@ -819,7 +819,7 @@ public:
#else
const DWORD* s = &columnTable4[0][0];
const WORD* s = &columnTable4[0][0];
for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
{
@@ -935,7 +935,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -980,7 +980,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -1023,7 +1023,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -1624,7 +1624,7 @@ public:
#else
const DWORD* d = &columnTable32[0][0];
const BYTE* d = &columnTable32[0][0];
for(int j = 0, diff = srcpitch - 8 * 3; j < 8; j++, src += diff, d += 8)
{
@@ -1689,7 +1689,7 @@ public:
#else
const DWORD* d = &columnTable32[0][0];
const BYTE* d = &columnTable32[0][0];
for(int j = 0; j < 8; j++, src += srcpitch, dst += 8)
{
@@ -1816,7 +1816,7 @@ public:
#else
const DWORD* d = &columnTable32[0][0];
const BYTE* d = &columnTable32[0][0];
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
{
@@ -1944,7 +1944,7 @@ public:
#else
const DWORD* d = &columnTable32[0][0];
const BYTE* d = &columnTable32[0][0];
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
{
@@ -2004,7 +2004,7 @@ public:
DWORD TA0 = TEXA.TA0 << 24;
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -2082,7 +2082,7 @@ public:
#else
const DWORD* s = &columnTable8[0][0];
const BYTE* s = &columnTable8[0][0];
for(int j = 0; j < 16; j++, s += 16, dst += dstpitch)
{
@@ -2169,14 +2169,14 @@ public:
#else
const DWORD* s = &columnTable4[0][0];
const WORD* s = &columnTable4[0][0];
for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
{
for(int i = 0; i < 16; i++)
{
DWORD a0 = s[i * 2 + 0];
DWORD a1 = s[i * 2 + 1];
BYTE a0 = s[i * 2 + 0];
BYTE a1 = s[i * 2 + 1];
BYTE c0 = (src[a0 >> 1] >> ((a0 & 1) << 2)) & 0x0f;
BYTE c1 = (src[a1 >> 1] >> ((a1 & 1) << 2)) & 0x0f;
@@ -2228,7 +2228,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -2281,7 +2281,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{
@@ -2334,7 +2334,7 @@ public:
#else
const DWORD* s = &columnTable32[0][0];
const BYTE* s = &columnTable32[0][0];
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
{

File diff suppressed because it is too large Load Diff

View File

@@ -100,29 +100,29 @@ __declspec(align(16)) struct GSScanlineParam
const DWORD* clut;
DWORD tw;
GSLocalMemory::Offset* fbo;
GSLocalMemory::Offset* zbo;
DWORD fm, zm;
};
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
{
GSScanlineEnvironment m_env;
struct Offset
struct ActiveDrawScanlinePtr
{
GSVector4i row[1024];
int* col[4];
DWORD hash;
UINT64 frame;
UINT64 frames;
__int64 ticks;
__int64 pixels;
DrawScanlinePtr dsf;
};
CRBMapC<DWORD, Offset*> m_omap;
Offset* m_fbo;
Offset* m_zbo;
GSScanlineEnvironment m_env;
void SetupOffset(Offset*& co, DWORD bp, DWORD bw, DWORD psm);
void FreeOffsets();
DrawScanlinePtr m_ds[4][4][4][2], m_dsf;
CRBMap<DWORD, DrawScanlinePtr> m_dsmap, m_dsmap2;
DrawScanlinePtr m_ds[4][4][4][2];
CRBMap<DWORD, DrawScanlinePtr> m_dsmap;
CRBMap<DWORD, ActiveDrawScanlinePtr*> m_dsmap_active;
ActiveDrawScanlinePtr* m_dsf;
void Init();
@@ -153,18 +153,31 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const;
__forceinline void WriteFrameAndZBufX(int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f, int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z, int pixels);
template<class T, bool masked>
void DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, DWORD c, DWORD m);
template<class T, bool masked>
__forceinline void FillRect(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
template<class T, bool masked>
__forceinline void FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
protected:
GSState* m_state;
int m_id;
public:
GSDrawScanline(GSState* state);
GSDrawScanline(GSState* state, int id);
virtual ~GSDrawScanline();
// IDrawScanline
bool SetupDraw(const GSRasterizerData* data);
bool BeginDraw(const GSRasterizerData* data);
void EndDraw(const GSRasterizerStats& stats);
void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
DrawScanlinePtr GetDrawScanlinePtr();
void PrintStats();
};

View File

@@ -386,6 +386,67 @@ GSLocalMemory::GSLocalMemory()
GSLocalMemory::~GSLocalMemory()
{
VirtualFree(m_vm8, 0, MEM_RELEASE);
POSITION pos = m_omap.GetHeadPosition();
while(pos)
{
Offset* o = m_omap.GetNextValue(pos);
for(int i = 0; i < countof(o->col); i++)
{
_aligned_free(o->col);
}
_aligned_free(o);
}
m_omap.RemoveAll();
}
GSLocalMemory::Offset* GSLocalMemory::GetOffset(DWORD bp, DWORD bw, DWORD psm, Offset* o)
{
if(bw == 0) {ASSERT(0); return NULL;}
ASSERT(m_psm[psm].trbpp > 8); // only for 16/24/32/8h/4hh/4hl formats where all columns are the same
DWORD hash = bp | (bw << 14) | (psm << 20);
if(!o || o->hash != hash)
{
CRBMap<DWORD, Offset*>::CPair* pair = m_omap.Lookup(hash);
if(pair)
{
o = pair->m_value;
}
else
{
o = (Offset*)_aligned_malloc(sizeof(Offset), 16);
o->hash = hash;
pixelAddress pa = m_psm[psm].pa;
for(int i = 0, j = 2048; i < j; i++)
{
o->row[i] = GSVector4i((int)pa(0, i, bp, bw));
}
int* p = (int*)_aligned_malloc(sizeof(int) * (2048 + 3) * 4, 16);
for(int i = 0; i < 4; i++)
{
o->col[i] = &p[2048 * i + ((4 - (i & 3)) & 3)];
memcpy(o->col[i], m_psm[psm].rowOffset[0], sizeof(int) * 2048);
}
m_omap.SetAt(hash, o);
}
}
return o;
}
bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)

View File

@@ -75,6 +75,13 @@ public:
GSClut m_clut;
struct Offset
{
GSVector4i row[2048];
int* col[4];
DWORD hash;
};
protected:
static DWORD pageOffset32[32][32][64];
static DWORD pageOffset32Z[32][32][64];
@@ -108,10 +115,16 @@ protected:
friend class GSClut;
//
CRBMapC<DWORD, Offset*> m_omap;
public:
GSLocalMemory();
virtual ~GSLocalMemory();
Offset* GetOffset(DWORD bp, DWORD bw, DWORD psm, Offset* o = NULL);
// address
static DWORD PageNumber32(int x, int y, DWORD bp, DWORD bw)

View File

@@ -26,7 +26,6 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
: m_ds(ds)
, m_id(id)
, m_threads(threads)
, m_pixels(0)
{
}
@@ -35,42 +34,51 @@ GSRasterizer::~GSRasterizer()
delete m_ds;
}
int GSRasterizer::Draw(const GSRasterizerData* data)
void GSRasterizer::Draw(const GSRasterizerData* data)
{
bool solid = m_ds->SetupDraw(data);
bool solid = m_ds->BeginDraw(data);
const GSVector4i scissor = data->scissor;
const GSVertexSW* vertices = data->vertices;
const int count = data->count;
int prims = 0;
m_stats.Reset();
__int64 start = __rdtsc();
switch(data->primclass)
{
case GS_POINT_CLASS:
prims = count;
m_stats.prims = count;
for(int i = 0; i < count; i++) DrawPoint(&vertices[i], scissor);
break;
case GS_LINE_CLASS:
ASSERT(!(count & 1));
prims = count / 2;
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i], scissor);
break;
case GS_TRIANGLE_CLASS:
ASSERT(!(count % 3));
prims = count / 3;
m_stats.prims = count / 3;
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i], scissor);
break;
case GS_SPRITE_CLASS:
ASSERT(!(count & 1));
prims = count / 2;
m_stats.prims = count / 2;
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], scissor, solid);
break;
default:
__assume(0);
}
return prims;
m_stats.ticks = __rdtsc() - start;
m_ds->EndDraw(m_stats);
}
void GSRasterizer::GetStats(GSRasterizerStats& stats)
{
stats = m_stats;
}
void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
@@ -85,7 +93,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
{
m_ds->DrawScanline(p.y, p.x, p.x + 1, *v);
m_pixels++;
m_stats.pixels++;
}
}
}
@@ -224,7 +232,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
if(top < scissor.y) top = scissor.y;
if(bottom > scissor.w) bottom = scissor.w;
if(top < bottom)
{
float py = (float)top - l.p.y;
@@ -316,7 +324,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
GSVector4 dr;
bool b = (longest.p > GSVector4::zero()).mask() & 1;
if(b)
{
dl = v01 / v01.p.yyyy();
@@ -428,7 +436,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
if(pixels > 0)
{
m_pixels += pixels;
m_stats.pixels += pixels;
GSVertexSW scan = l;
@@ -496,7 +504,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
{
m_ds->DrawSolidRect(r, scan);
m_pixels += (r.z - r.x) * (r.w - r.y);
m_stats.pixels += (r.z - r.x) * (r.w - r.y);
}
return;
@@ -527,7 +535,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
{
(m_ds->*dsf)(top, left, right, scan);
m_pixels += right - left;
m_stats.pixels += right - left;
}
}
}
@@ -563,13 +571,11 @@ GSRasterizerMT::~GSRasterizerMT()
}
}
int GSRasterizerMT::Draw(const GSRasterizerData* data)
void GSRasterizerMT::Draw(const GSRasterizerData* data)
{
int prims = 0;
if(m_id == 0)
{
prims = __super::Draw(data);
__super::Draw(data);
}
else
{
@@ -577,8 +583,6 @@ int GSRasterizerMT::Draw(const GSRasterizerData* data)
InterlockedBitTestAndSet(m_sync, m_id);
}
return prims;
}
DWORD WINAPI GSRasterizerMT::StaticThreadProc(LPVOID lpParam)
@@ -631,17 +635,19 @@ void GSRasterizerList::FreeRasterizers()
}
}
int GSRasterizerList::Draw(const GSRasterizerData* data)
void GSRasterizerList::Draw(const GSRasterizerData* data)
{
*m_sync = 0;
int prims = 0;
m_stats.Reset();
__int64 start = __rdtsc();
POSITION pos = GetTailPosition();
while(pos)
{
prims += GetPrev(pos)->Draw(data);
GetPrev(pos)->Draw(data);
}
while(*m_sync)
@@ -649,19 +655,22 @@ int GSRasterizerList::Draw(const GSRasterizerData* data)
_mm_pause();
}
return prims;
}
m_stats.ticks = __rdtsc() - start;
int GSRasterizerList::GetPixels()
{
int pixels = 0;
POSITION pos = GetHeadPosition();
pos = GetHeadPosition();
while(pos)
{
pixels += GetNext(pos)->GetPixels();
}
GSRasterizerStats s;
return pixels;
GetNext(pos)->GetStats(s);
m_stats.pixels += s.pixels;
m_stats.prims = max(m_stats.prims, s.prims);
}
}
void GSRasterizerList::GetStats(GSRasterizerStats& stats)
{
stats = m_stats;
}

View File

@@ -24,6 +24,7 @@
#include "GS.h"
#include "GSVertexSW.h"
//
#define FAST_DRAWSCANLINE
__declspec(align(16)) class GSRasterizerData
@@ -36,11 +37,21 @@ public:
const void* param;
};
struct GSRasterizerStats
{
__int64 ticks;
int prims;
int pixels;
GSRasterizerStats() {Reset();}
void Reset() {ticks = 0; pixels = prims = 0;}
};
class IRasterizer
{
public:
virtual int Draw(const GSRasterizerData* data) = 0;
virtual int GetPixels() = 0;
virtual void Draw(const GSRasterizerData* data) = 0;
virtual void GetStats(GSRasterizerStats& stats) = 0;
};
class IDrawScanline
@@ -50,11 +61,14 @@ public:
virtual ~IDrawScanline() {}
virtual bool SetupDraw(const GSRasterizerData* data) = 0;
virtual bool BeginDraw(const GSRasterizerData* data) = 0;
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
virtual void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int top, int left, int right, const GSVertexSW& v) = 0;
virtual void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) = 0;
virtual DrawScanlinePtr GetDrawScanlinePtr() = 0;
virtual void PrintStats() = 0;
};
class GSRasterizer : public IRasterizer
@@ -63,7 +77,7 @@ protected:
IDrawScanline* m_ds;
int m_id;
int m_threads;
int m_pixels;
GSRasterizerStats m_stats;
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
@@ -82,8 +96,10 @@ public:
// IRasterizer
int Draw(const GSRasterizerData* data);
int GetPixels() {int pixels = m_pixels; m_pixels = 0; return pixels;}
void Draw(const GSRasterizerData* data);
void GetStats(GSRasterizerStats& stats);
void PrintStats() {m_ds->PrintStats();}
};
class GSRasterizerMT : public GSRasterizer
@@ -104,12 +120,13 @@ public:
// IRasterizer
int Draw(const GSRasterizerData* data);
void Draw(const GSRasterizerData* data);
};
class GSRasterizerList : public CAtlList<GSRasterizerMT*>, public IRasterizer
{
long* m_sync;
GSRasterizerStats m_stats;
void FreeRasterizers();
@@ -125,12 +142,14 @@ public:
for(int i = 0; i < threads; i++)
{
AddTail(new GSRasterizerMT(new DS(parent), i, threads, m_sync));
AddTail(new GSRasterizerMT(new DS(parent, i), i, threads, m_sync));
}
}
// IRasterizer
int Draw(const GSRasterizerData* data);
int GetPixels();
void Draw(const GSRasterizerData* data);
void GetStats(GSRasterizerStats& stats);
void PrintStats() {GetHead()->PrintStats();}
};

View File

@@ -35,6 +35,8 @@ protected:
GSTextureCacheSW* m_tc;
Texture m_texture[2];
bool m_reset;
GSLocalMemory::Offset* m_fbo;
GSLocalMemory::Offset* m_zbo;
__declspec(align(16)) struct VertexTrace
{
@@ -83,6 +85,8 @@ protected:
m_reset = false;
}
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
}
void ResetDevice()
@@ -273,6 +277,12 @@ protected:
p.vm = m_mem.m_vm32;
m_fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM, m_fbo);
m_zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM, m_zbo);
p.fbo = m_fbo;
p.zbo = m_zbo;
p.sel.dw = 0;
p.sel.fpsm = 3;
@@ -574,19 +584,19 @@ __int64 start = __rdtsc();
data.count = m_count;
data.param = &p;
int prims = m_rl.Draw(&data);
m_perfmon.Put(GSPerfMon::Prim, prims);
m_rl.Draw(&data);
GSRasterizerStats stats;
m_rl.GetStats(stats);
m_perfmon.Put(GSPerfMon::Draw, 1);
int pixels = m_rl.GetPixels();
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
/*
__int64 diff = __rdtsc() - start;
s_total += diff;
//if(pixels > 50000)
if(pixels >= 50000)
fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I64d \n", frame, PRIM->PRIM, prims, pixels, p.sel, diff, diff / prims, pixels > 0 ? diff / pixels : 0);
*/
// TODO
@@ -636,7 +646,7 @@ fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I6
if(s_dump)
{
CString str;
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
@@ -651,6 +661,8 @@ fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I6
public:
GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads)
: GSRendererT(base, mt, irq, nloophack, rs)
, m_fbo(NULL)
, m_zbo(NULL)
{
m_rl.Create<GSDrawScanline>(this, threads);

View File

@@ -416,12 +416,7 @@ void GSState::GIFPackedRegHandlerSTQ(GIFPackedReg* r)
void GSState::GIFPackedRegHandlerUV(GIFPackedReg* r)
{
#if _M_SSE >= 0x401
GSVector4i v = GSVector4i::loadl(r);
m_v.UV.ai32[0] = (UINT32)GSVector4i::store(v.pu32(v)) & 0x3fff3fff;
#elif _M_SSE >= 0x200
#if _M_SSE >= 0x200
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV.ai32[0] = (UINT32)GSVector4i::store(v.ps32(v));

View File

@@ -22,21 +22,21 @@
#include "StdAfx.h"
#include "GSTables.h"
const DWORD blockTable32[4][8] = {
const BYTE blockTable32[4][8] = {
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
{ 8, 9, 12, 13, 24, 25, 28, 29},
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
const DWORD blockTable32Z[4][8] = {
const BYTE blockTable32Z[4][8] = {
{ 24, 25, 28, 29, 8, 9, 12, 13},
{ 26, 27, 30, 31, 10, 11, 14, 15},
{ 16, 17, 20, 21, 0, 1, 4, 5},
{ 18, 19, 22, 23, 2, 3, 6, 7}
};
const DWORD blockTable16[8][4] = {
const BYTE blockTable16[8][4] = {
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
{ 4, 6, 12, 14 },
@@ -47,7 +47,7 @@ const DWORD blockTable16[8][4] = {
{ 21, 23, 29, 31 }
};
const DWORD blockTable16S[8][4] = {
const BYTE blockTable16S[8][4] = {
{ 0, 2, 16, 18 },
{ 1, 3, 17, 19 },
{ 8, 10, 24, 26 },
@@ -58,7 +58,7 @@ const DWORD blockTable16S[8][4] = {
{ 13, 15, 29, 31 }
};
const DWORD blockTable16Z[8][4] = {
const BYTE blockTable16Z[8][4] = {
{ 24, 26, 16, 18 },
{ 25, 27, 17, 19 },
{ 28, 30, 20, 22 },
@@ -69,7 +69,7 @@ const DWORD blockTable16Z[8][4] = {
{ 13, 15, 5, 7 }
};
const DWORD blockTable16SZ[8][4] = {
const BYTE blockTable16SZ[8][4] = {
{ 24, 26, 8, 10 },
{ 25, 27, 9, 11 },
{ 16, 18, 0, 2 },
@@ -80,14 +80,14 @@ const DWORD blockTable16SZ[8][4] = {
{ 21, 23, 5, 7 }
};
const DWORD blockTable8[4][8] = {
const BYTE blockTable8[4][8] = {
{ 0, 1, 4, 5, 16, 17, 20, 21},
{ 2, 3, 6, 7, 18, 19, 22, 23},
{ 8, 9, 12, 13, 24, 25, 28, 29},
{ 10, 11, 14, 15, 26, 27, 30, 31}
};
const DWORD blockTable4[8][4] = {
const BYTE blockTable4[8][4] = {
{ 0, 2, 8, 10 },
{ 1, 3, 9, 11 },
{ 4, 6, 12, 14 },
@@ -98,7 +98,7 @@ const DWORD blockTable4[8][4] = {
{ 21, 23, 29, 31 }
};
const DWORD columnTable32[8][8] = {
const BYTE columnTable32[8][8] = {
{ 0, 1, 4, 5, 8, 9, 12, 13 },
{ 2, 3, 6, 7, 10, 11, 14, 15 },
{ 16, 17, 20, 21, 24, 25, 28, 29 },
@@ -109,7 +109,7 @@ const DWORD columnTable32[8][8] = {
{ 50, 51, 54, 55, 58, 59, 62, 63 },
};
const DWORD columnTable16[8][16] = {
const BYTE columnTable16[8][16] = {
{ 0, 2, 8, 10, 16, 18, 24, 26,
1, 3, 9, 11, 17, 19, 25, 27 },
{ 4, 6, 12, 14, 20, 22, 28, 30,
@@ -128,7 +128,7 @@ const DWORD columnTable16[8][16] = {
101, 103, 109, 111, 117, 119, 125, 127 },
};
const DWORD columnTable8[16][16] = {
const BYTE columnTable8[16][16] = {
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
2, 6, 18, 22, 34, 38, 50, 54 },
{ 8, 12, 24, 28, 40, 44, 56, 60,
@@ -163,7 +163,7 @@ const DWORD columnTable8[16][16] = {
203, 207, 219, 223, 235, 239, 251, 255 },
};
const DWORD columnTable4[16][32] = {
const WORD columnTable4[16][32] = {
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
2, 10, 34, 42, 66, 74, 98, 106,
4, 12, 36, 44, 68, 76, 100, 108,
@@ -230,7 +230,7 @@ const DWORD columnTable4[16][32] = {
407, 415, 439, 447, 471, 479, 503, 511 },
};
const DWORD clutTableT32I8[128] =
const BYTE clutTableT32I8[128] =
{
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79,
@@ -242,13 +242,13 @@ const DWORD clutTableT32I8[128] =
112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
};
const DWORD clutTableT32I4[16] =
const BYTE clutTableT32I4[16] =
{
0, 1, 4, 5, 8, 9, 12, 13,
2, 3, 6, 7, 10, 11, 14, 15
};
const DWORD clutTableT16I8[32] =
const BYTE clutTableT16I8[32] =
{
0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30,
@@ -256,7 +256,7 @@ const DWORD clutTableT16I8[32] =
5, 7, 13, 15, 21, 23, 29, 31
};
const DWORD clutTableT16I4[16] =
const BYTE clutTableT16I4[16] =
{
0, 2, 8, 10, 16, 18, 24, 26,
4, 6, 12, 14, 20, 22, 28, 30

View File

@@ -21,19 +21,19 @@
#pragma once
extern const DWORD blockTable32[4][8];
extern const DWORD blockTable32Z[4][8];
extern const DWORD blockTable16[8][4];
extern const DWORD blockTable16S[8][4];
extern const DWORD blockTable16Z[8][4];
extern const DWORD blockTable16SZ[8][4];
extern const DWORD blockTable8[4][8];
extern const DWORD blockTable4[8][4];
extern const DWORD columnTable32[8][8];
extern const DWORD columnTable16[8][16];
extern const DWORD columnTable8[16][16];
extern const DWORD columnTable4[16][32];
extern const DWORD clutTableT32I8[128];
extern const DWORD clutTableT32I4[16];
extern const DWORD clutTableT16I8[32];
extern const DWORD clutTableT16I4[16];
extern const BYTE blockTable32[4][8];
extern const BYTE blockTable32Z[4][8];
extern const BYTE blockTable16[8][4];
extern const BYTE blockTable16S[8][4];
extern const BYTE blockTable16Z[8][4];
extern const BYTE blockTable16SZ[8][4];
extern const BYTE blockTable8[4][8];
extern const BYTE blockTable4[8][4];
extern const BYTE columnTable32[8][8];
extern const BYTE columnTable16[8][16];
extern const BYTE columnTable8[16][16];
extern const WORD columnTable4[16][32];
extern const BYTE clutTableT32I8[128];
extern const BYTE clutTableT32I4[16];
extern const BYTE clutTableT16I8[32];
extern const BYTE clutTableT16I4[16];