mirror of
https://github.com/PCSX2/gsdx-sourceforge.git
synced 2026-02-04 03:11:19 +01:00
This commit is contained in:
@@ -22,8 +22,9 @@
|
||||
#include "StdAfx.h"
|
||||
#include "GPUDrawScanline.h"
|
||||
|
||||
GPUDrawScanline::GPUDrawScanline(GPUState* state)
|
||||
GPUDrawScanline::GPUDrawScanline(GPUState* state, int id)
|
||||
: m_state(state)
|
||||
, m_id(id)
|
||||
{
|
||||
Init();
|
||||
}
|
||||
@@ -34,7 +35,7 @@ GPUDrawScanline::~GPUDrawScanline()
|
||||
|
||||
// IDrawScanline
|
||||
|
||||
bool GPUDrawScanline::SetupDraw(const GSRasterizerData* data)
|
||||
bool GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||
{
|
||||
GPUDrawingEnvironment& env = m_state->m_env;
|
||||
|
||||
|
||||
@@ -101,18 +101,20 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
|
||||
protected:
|
||||
GPUState* m_state;
|
||||
int m_filter;
|
||||
int m_dither;
|
||||
int m_id;
|
||||
|
||||
public:
|
||||
GPUDrawScanline(GPUState* state);
|
||||
GPUDrawScanline(GPUState* state, int id);
|
||||
virtual ~GPUDrawScanline();
|
||||
|
||||
// IDrawScanline
|
||||
|
||||
bool SetupDraw(const GSRasterizerData* data);
|
||||
bool BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats) {}
|
||||
void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
DrawScanlinePtr GetDrawScanlinePtr();
|
||||
|
||||
void PrintStats() {}
|
||||
};
|
||||
|
||||
@@ -164,14 +164,15 @@ protected:
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
int prims = m_rl.Draw(&data);
|
||||
m_rl.Draw(&data);
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, prims);
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
|
||||
int pixels = m_rl.GetPixels();
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
|
||||
// TODO
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[(i & 3) << 1][0];
|
||||
const BYTE* d = &columnTable32[(i & 3) << 1][0];
|
||||
|
||||
for(int j = 0; j < 2; j++, d += 8, src += srcpitch)
|
||||
{
|
||||
@@ -138,7 +138,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable16[(i & 3) << 1][0];
|
||||
const BYTE* d = &columnTable16[(i & 3) << 1][0];
|
||||
|
||||
for(int j = 0; j < 2; j++, d += 16, src += srcpitch)
|
||||
{
|
||||
@@ -182,7 +182,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable8[(i & 3) << 2][0];
|
||||
const BYTE* d = &columnTable8[(i & 3) << 2][0];
|
||||
|
||||
for(int j = 0; j < 4; j++, d += 16, src += srcpitch)
|
||||
{
|
||||
@@ -229,7 +229,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable4[(i & 3) << 2][0];
|
||||
const WORD* d = &columnTable4[(i & 3) << 2][0];
|
||||
|
||||
for(int j = 0; j < 4; j++, d += 32, src += srcpitch)
|
||||
{
|
||||
@@ -307,7 +307,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[0][0];
|
||||
const BYTE* d = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
||||
{
|
||||
@@ -341,7 +341,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable16[0][0];
|
||||
const BYTE* d = &columnTable16[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, d += 16, src += srcpitch)
|
||||
{
|
||||
@@ -368,7 +368,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable8[0][0];
|
||||
const BYTE* d = &columnTable8[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, d += 16, src += srcpitch)
|
||||
{
|
||||
@@ -395,7 +395,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable4[0][0];
|
||||
const WORD* d = &columnTable4[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, d += 32, src += srcpitch)
|
||||
{
|
||||
@@ -434,7 +434,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[(i & 3) << 1][0];
|
||||
const BYTE* s = &columnTable32[(i & 3) << 1][0];
|
||||
|
||||
for(int j = 0; j < 2; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -492,7 +492,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable16[(i & 3) << 1][0];
|
||||
const BYTE* s = &columnTable16[(i & 3) << 1][0];
|
||||
|
||||
for(int j = 0; j < 2; j++, s += 16, dst += dstpitch)
|
||||
{
|
||||
@@ -573,7 +573,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable8[(i & 3) << 2][0];
|
||||
const BYTE* s = &columnTable8[(i & 3) << 2][0];
|
||||
|
||||
for(int j = 0; j < 4; j++, s += 16, dst += dstpitch)
|
||||
{
|
||||
@@ -660,7 +660,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable4[(i & 3) << 2][0];
|
||||
const WORD* s = &columnTable4[(i & 3) << 2][0];
|
||||
|
||||
for(int j = 0; j < 4; j++, s += 32, dst += dstpitch)
|
||||
{
|
||||
@@ -738,7 +738,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -765,7 +765,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable16[0][0];
|
||||
const BYTE* s = &columnTable16[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 16, dst += dstpitch)
|
||||
{
|
||||
@@ -792,7 +792,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable8[0][0];
|
||||
const BYTE* s = &columnTable8[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, s += 16, dst += dstpitch)
|
||||
{
|
||||
@@ -819,7 +819,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable4[0][0];
|
||||
const WORD* s = &columnTable4[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
|
||||
{
|
||||
@@ -935,7 +935,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -980,7 +980,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -1023,7 +1023,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -1624,7 +1624,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[0][0];
|
||||
const BYTE* d = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0, diff = srcpitch - 8 * 3; j < 8; j++, src += diff, d += 8)
|
||||
{
|
||||
@@ -1689,7 +1689,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[0][0];
|
||||
const BYTE* d = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, src += srcpitch, dst += 8)
|
||||
{
|
||||
@@ -1816,7 +1816,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[0][0];
|
||||
const BYTE* d = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
||||
{
|
||||
@@ -1944,7 +1944,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* d = &columnTable32[0][0];
|
||||
const BYTE* d = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
||||
{
|
||||
@@ -2004,7 +2004,7 @@ public:
|
||||
|
||||
DWORD TA0 = TEXA.TA0 << 24;
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -2082,7 +2082,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable8[0][0];
|
||||
const BYTE* s = &columnTable8[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, s += 16, dst += dstpitch)
|
||||
{
|
||||
@@ -2169,14 +2169,14 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable4[0][0];
|
||||
const WORD* s = &columnTable4[0][0];
|
||||
|
||||
for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
DWORD a0 = s[i * 2 + 0];
|
||||
DWORD a1 = s[i * 2 + 1];
|
||||
BYTE a0 = s[i * 2 + 0];
|
||||
BYTE a1 = s[i * 2 + 1];
|
||||
|
||||
BYTE c0 = (src[a0 >> 1] >> ((a0 & 1) << 2)) & 0x0f;
|
||||
BYTE c1 = (src[a1 >> 1] >> ((a1 & 1) << 2)) & 0x0f;
|
||||
@@ -2228,7 +2228,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -2281,7 +2281,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
@@ -2334,7 +2334,7 @@ public:
|
||||
|
||||
#else
|
||||
|
||||
const DWORD* s = &columnTable32[0][0];
|
||||
const BYTE* s = &columnTable32[0][0];
|
||||
|
||||
for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -100,29 +100,29 @@ __declspec(align(16)) struct GSScanlineParam
|
||||
const DWORD* clut;
|
||||
DWORD tw;
|
||||
|
||||
GSLocalMemory::Offset* fbo;
|
||||
GSLocalMemory::Offset* zbo;
|
||||
|
||||
DWORD fm, zm;
|
||||
};
|
||||
|
||||
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
{
|
||||
GSScanlineEnvironment m_env;
|
||||
|
||||
struct Offset
|
||||
struct ActiveDrawScanlinePtr
|
||||
{
|
||||
GSVector4i row[1024];
|
||||
int* col[4];
|
||||
DWORD hash;
|
||||
UINT64 frame;
|
||||
UINT64 frames;
|
||||
__int64 ticks;
|
||||
__int64 pixels;
|
||||
DrawScanlinePtr dsf;
|
||||
};
|
||||
|
||||
CRBMapC<DWORD, Offset*> m_omap;
|
||||
Offset* m_fbo;
|
||||
Offset* m_zbo;
|
||||
GSScanlineEnvironment m_env;
|
||||
|
||||
void SetupOffset(Offset*& co, DWORD bp, DWORD bw, DWORD psm);
|
||||
void FreeOffsets();
|
||||
|
||||
DrawScanlinePtr m_ds[4][4][4][2], m_dsf;
|
||||
CRBMap<DWORD, DrawScanlinePtr> m_dsmap, m_dsmap2;
|
||||
DrawScanlinePtr m_ds[4][4][4][2];
|
||||
CRBMap<DWORD, DrawScanlinePtr> m_dsmap;
|
||||
CRBMap<DWORD, ActiveDrawScanlinePtr*> m_dsmap_active;
|
||||
ActiveDrawScanlinePtr* m_dsf;
|
||||
|
||||
void Init();
|
||||
|
||||
@@ -153,18 +153,31 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
__forceinline GSVector4i ReadZBufX(int psm, const GSVector4i& addr) const;
|
||||
__forceinline void WriteFrameAndZBufX(int fpsm, const GSVector4i& fa, const GSVector4i& fm, const GSVector4i& f, int zpsm, const GSVector4i& za, const GSVector4i& zm, const GSVector4i& z, int pixels);
|
||||
|
||||
template<class T, bool masked>
|
||||
void DrawSolidRectT(const GSVector4i* row, int* col, const GSVector4i& r, DWORD c, DWORD m);
|
||||
|
||||
template<class T, bool masked>
|
||||
__forceinline void FillRect(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
||||
|
||||
template<class T, bool masked>
|
||||
__forceinline void FillBlock(const GSVector4i* row, int* col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
||||
|
||||
protected:
|
||||
GSState* m_state;
|
||||
int m_id;
|
||||
|
||||
public:
|
||||
GSDrawScanline(GSState* state);
|
||||
GSDrawScanline(GSState* state, int id);
|
||||
virtual ~GSDrawScanline();
|
||||
|
||||
// IDrawScanline
|
||||
|
||||
bool SetupDraw(const GSRasterizerData* data);
|
||||
bool BeginDraw(const GSRasterizerData* data);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
DrawScanlinePtr GetDrawScanlinePtr();
|
||||
|
||||
void PrintStats();
|
||||
};
|
||||
|
||||
@@ -386,6 +386,67 @@ GSLocalMemory::GSLocalMemory()
|
||||
GSLocalMemory::~GSLocalMemory()
|
||||
{
|
||||
VirtualFree(m_vm8, 0, MEM_RELEASE);
|
||||
|
||||
POSITION pos = m_omap.GetHeadPosition();
|
||||
|
||||
while(pos)
|
||||
{
|
||||
Offset* o = m_omap.GetNextValue(pos);
|
||||
|
||||
for(int i = 0; i < countof(o->col); i++)
|
||||
{
|
||||
_aligned_free(o->col);
|
||||
}
|
||||
|
||||
_aligned_free(o);
|
||||
}
|
||||
|
||||
m_omap.RemoveAll();
|
||||
}
|
||||
|
||||
GSLocalMemory::Offset* GSLocalMemory::GetOffset(DWORD bp, DWORD bw, DWORD psm, Offset* o)
|
||||
{
|
||||
if(bw == 0) {ASSERT(0); return NULL;}
|
||||
|
||||
ASSERT(m_psm[psm].trbpp > 8); // only for 16/24/32/8h/4hh/4hl formats where all columns are the same
|
||||
|
||||
DWORD hash = bp | (bw << 14) | (psm << 20);
|
||||
|
||||
if(!o || o->hash != hash)
|
||||
{
|
||||
CRBMap<DWORD, Offset*>::CPair* pair = m_omap.Lookup(hash);
|
||||
|
||||
if(pair)
|
||||
{
|
||||
o = pair->m_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
o = (Offset*)_aligned_malloc(sizeof(Offset), 16);
|
||||
|
||||
o->hash = hash;
|
||||
|
||||
pixelAddress pa = m_psm[psm].pa;
|
||||
|
||||
for(int i = 0, j = 2048; i < j; i++)
|
||||
{
|
||||
o->row[i] = GSVector4i((int)pa(0, i, bp, bw));
|
||||
}
|
||||
|
||||
int* p = (int*)_aligned_malloc(sizeof(int) * (2048 + 3) * 4, 16);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
o->col[i] = &p[2048 * i + ((4 - (i & 3)) & 3)];
|
||||
|
||||
memcpy(o->col[i], m_psm[psm].rowOffset[0], sizeof(int) * 2048);
|
||||
}
|
||||
|
||||
m_omap.SetAt(hash, o);
|
||||
}
|
||||
}
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
bool GSLocalMemory::FillRect(const GSVector4i& r, DWORD c, DWORD psm, DWORD bp, DWORD bw)
|
||||
|
||||
@@ -75,6 +75,13 @@ public:
|
||||
|
||||
GSClut m_clut;
|
||||
|
||||
struct Offset
|
||||
{
|
||||
GSVector4i row[2048];
|
||||
int* col[4];
|
||||
DWORD hash;
|
||||
};
|
||||
|
||||
protected:
|
||||
static DWORD pageOffset32[32][32][64];
|
||||
static DWORD pageOffset32Z[32][32][64];
|
||||
@@ -108,10 +115,16 @@ protected:
|
||||
|
||||
friend class GSClut;
|
||||
|
||||
//
|
||||
|
||||
CRBMapC<DWORD, Offset*> m_omap;
|
||||
|
||||
public:
|
||||
GSLocalMemory();
|
||||
virtual ~GSLocalMemory();
|
||||
|
||||
Offset* GetOffset(DWORD bp, DWORD bw, DWORD psm, Offset* o = NULL);
|
||||
|
||||
// address
|
||||
|
||||
static DWORD PageNumber32(int x, int y, DWORD bp, DWORD bw)
|
||||
|
||||
@@ -26,7 +26,6 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
||||
: m_ds(ds)
|
||||
, m_id(id)
|
||||
, m_threads(threads)
|
||||
, m_pixels(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -35,42 +34,51 @@ GSRasterizer::~GSRasterizer()
|
||||
delete m_ds;
|
||||
}
|
||||
|
||||
int GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
bool solid = m_ds->SetupDraw(data);
|
||||
bool solid = m_ds->BeginDraw(data);
|
||||
|
||||
const GSVector4i scissor = data->scissor;
|
||||
const GSVertexSW* vertices = data->vertices;
|
||||
const int count = data->count;
|
||||
|
||||
int prims = 0;
|
||||
m_stats.Reset();
|
||||
|
||||
__int64 start = __rdtsc();
|
||||
|
||||
switch(data->primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
prims = count;
|
||||
m_stats.prims = count;
|
||||
for(int i = 0; i < count; i++) DrawPoint(&vertices[i], scissor);
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
ASSERT(!(count & 1));
|
||||
prims = count / 2;
|
||||
m_stats.prims = count / 2;
|
||||
for(int i = 0; i < count; i += 2) DrawLine(&vertices[i], scissor);
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
ASSERT(!(count % 3));
|
||||
prims = count / 3;
|
||||
m_stats.prims = count / 3;
|
||||
for(int i = 0; i < count; i += 3) DrawTriangle(&vertices[i], scissor);
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
ASSERT(!(count & 1));
|
||||
prims = count / 2;
|
||||
m_stats.prims = count / 2;
|
||||
for(int i = 0; i < count; i += 2) DrawSprite(&vertices[i], scissor, solid);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
return prims;
|
||||
m_stats.ticks = __rdtsc() - start;
|
||||
|
||||
m_ds->EndDraw(m_stats);
|
||||
}
|
||||
|
||||
void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
||||
{
|
||||
stats = m_stats;
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
||||
@@ -85,7 +93,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
||||
{
|
||||
m_ds->DrawScanline(p.y, p.x, p.x + 1, *v);
|
||||
|
||||
m_pixels++;
|
||||
m_stats.pixels++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -224,7 +232,7 @@ void GSRasterizer::DrawTriangleTop(GSVertexSW* v, const GSVector4i& scissor)
|
||||
|
||||
if(top < scissor.y) top = scissor.y;
|
||||
if(bottom > scissor.w) bottom = scissor.w;
|
||||
|
||||
|
||||
if(top < bottom)
|
||||
{
|
||||
float py = (float)top - l.p.y;
|
||||
@@ -316,7 +324,7 @@ void GSRasterizer::DrawTriangleTopBottom(GSVertexSW* v, const GSVector4i& scisso
|
||||
GSVector4 dr;
|
||||
|
||||
bool b = (longest.p > GSVector4::zero()).mask() & 1;
|
||||
|
||||
|
||||
if(b)
|
||||
{
|
||||
dl = v01 / v01.p.yyyy();
|
||||
@@ -428,7 +436,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||
|
||||
if(pixels > 0)
|
||||
{
|
||||
m_pixels += pixels;
|
||||
m_stats.pixels += pixels;
|
||||
|
||||
GSVertexSW scan = l;
|
||||
|
||||
@@ -496,7 +504,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
||||
{
|
||||
m_ds->DrawSolidRect(r, scan);
|
||||
|
||||
m_pixels += (r.z - r.x) * (r.w - r.y);
|
||||
m_stats.pixels += (r.z - r.x) * (r.w - r.y);
|
||||
}
|
||||
|
||||
return;
|
||||
@@ -527,7 +535,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
||||
{
|
||||
(m_ds->*dsf)(top, left, right, scan);
|
||||
|
||||
m_pixels += right - left;
|
||||
m_stats.pixels += right - left;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -563,13 +571,11 @@ GSRasterizerMT::~GSRasterizerMT()
|
||||
}
|
||||
}
|
||||
|
||||
int GSRasterizerMT::Draw(const GSRasterizerData* data)
|
||||
void GSRasterizerMT::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
int prims = 0;
|
||||
|
||||
if(m_id == 0)
|
||||
{
|
||||
prims = __super::Draw(data);
|
||||
__super::Draw(data);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -577,8 +583,6 @@ int GSRasterizerMT::Draw(const GSRasterizerData* data)
|
||||
|
||||
InterlockedBitTestAndSet(m_sync, m_id);
|
||||
}
|
||||
|
||||
return prims;
|
||||
}
|
||||
|
||||
DWORD WINAPI GSRasterizerMT::StaticThreadProc(LPVOID lpParam)
|
||||
@@ -631,17 +635,19 @@ void GSRasterizerList::FreeRasterizers()
|
||||
}
|
||||
}
|
||||
|
||||
int GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||
{
|
||||
*m_sync = 0;
|
||||
|
||||
int prims = 0;
|
||||
m_stats.Reset();
|
||||
|
||||
__int64 start = __rdtsc();
|
||||
|
||||
POSITION pos = GetTailPosition();
|
||||
|
||||
while(pos)
|
||||
{
|
||||
prims += GetPrev(pos)->Draw(data);
|
||||
GetPrev(pos)->Draw(data);
|
||||
}
|
||||
|
||||
while(*m_sync)
|
||||
@@ -649,19 +655,22 @@ int GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
return prims;
|
||||
}
|
||||
m_stats.ticks = __rdtsc() - start;
|
||||
|
||||
int GSRasterizerList::GetPixels()
|
||||
{
|
||||
int pixels = 0;
|
||||
|
||||
POSITION pos = GetHeadPosition();
|
||||
pos = GetHeadPosition();
|
||||
|
||||
while(pos)
|
||||
{
|
||||
pixels += GetNext(pos)->GetPixels();
|
||||
}
|
||||
GSRasterizerStats s;
|
||||
|
||||
return pixels;
|
||||
GetNext(pos)->GetStats(s);
|
||||
|
||||
m_stats.pixels += s.pixels;
|
||||
m_stats.prims = max(m_stats.prims, s.prims);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizerList::GetStats(GSRasterizerStats& stats)
|
||||
{
|
||||
stats = m_stats;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "GS.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
//
|
||||
#define FAST_DRAWSCANLINE
|
||||
|
||||
__declspec(align(16)) class GSRasterizerData
|
||||
@@ -36,11 +37,21 @@ public:
|
||||
const void* param;
|
||||
};
|
||||
|
||||
struct GSRasterizerStats
|
||||
{
|
||||
__int64 ticks;
|
||||
int prims;
|
||||
int pixels;
|
||||
|
||||
GSRasterizerStats() {Reset();}
|
||||
void Reset() {ticks = 0; pixels = prims = 0;}
|
||||
};
|
||||
|
||||
class IRasterizer
|
||||
{
|
||||
public:
|
||||
virtual int Draw(const GSRasterizerData* data) = 0;
|
||||
virtual int GetPixels() = 0;
|
||||
virtual void Draw(const GSRasterizerData* data) = 0;
|
||||
virtual void GetStats(GSRasterizerStats& stats) = 0;
|
||||
};
|
||||
|
||||
class IDrawScanline
|
||||
@@ -50,11 +61,14 @@ public:
|
||||
|
||||
virtual ~IDrawScanline() {}
|
||||
|
||||
virtual bool SetupDraw(const GSRasterizerData* data) = 0;
|
||||
virtual bool BeginDraw(const GSRasterizerData* data) = 0;
|
||||
virtual void EndDraw(const GSRasterizerStats& stats) = 0;
|
||||
virtual void SetupPrim(GS_PRIM_CLASS primclass, const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
|
||||
virtual void DrawScanline(int top, int left, int right, const GSVertexSW& v) = 0;
|
||||
virtual void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v) = 0;
|
||||
virtual DrawScanlinePtr GetDrawScanlinePtr() = 0;
|
||||
|
||||
virtual void PrintStats() = 0;
|
||||
};
|
||||
|
||||
class GSRasterizer : public IRasterizer
|
||||
@@ -63,7 +77,7 @@ protected:
|
||||
IDrawScanline* m_ds;
|
||||
int m_id;
|
||||
int m_threads;
|
||||
int m_pixels;
|
||||
GSRasterizerStats m_stats;
|
||||
|
||||
void DrawPoint(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
void DrawLine(const GSVertexSW* v, const GSVector4i& scissor);
|
||||
@@ -82,8 +96,10 @@ public:
|
||||
|
||||
// IRasterizer
|
||||
|
||||
int Draw(const GSRasterizerData* data);
|
||||
int GetPixels() {int pixels = m_pixels; m_pixels = 0; return pixels;}
|
||||
void Draw(const GSRasterizerData* data);
|
||||
void GetStats(GSRasterizerStats& stats);
|
||||
|
||||
void PrintStats() {m_ds->PrintStats();}
|
||||
};
|
||||
|
||||
class GSRasterizerMT : public GSRasterizer
|
||||
@@ -104,12 +120,13 @@ public:
|
||||
|
||||
// IRasterizer
|
||||
|
||||
int Draw(const GSRasterizerData* data);
|
||||
void Draw(const GSRasterizerData* data);
|
||||
};
|
||||
|
||||
class GSRasterizerList : public CAtlList<GSRasterizerMT*>, public IRasterizer
|
||||
{
|
||||
long* m_sync;
|
||||
GSRasterizerStats m_stats;
|
||||
|
||||
void FreeRasterizers();
|
||||
|
||||
@@ -125,12 +142,14 @@ public:
|
||||
|
||||
for(int i = 0; i < threads; i++)
|
||||
{
|
||||
AddTail(new GSRasterizerMT(new DS(parent), i, threads, m_sync));
|
||||
AddTail(new GSRasterizerMT(new DS(parent, i), i, threads, m_sync));
|
||||
}
|
||||
}
|
||||
|
||||
// IRasterizer
|
||||
|
||||
int Draw(const GSRasterizerData* data);
|
||||
int GetPixels();
|
||||
void Draw(const GSRasterizerData* data);
|
||||
void GetStats(GSRasterizerStats& stats);
|
||||
|
||||
void PrintStats() {GetHead()->PrintStats();}
|
||||
};
|
||||
@@ -35,6 +35,8 @@ protected:
|
||||
GSTextureCacheSW* m_tc;
|
||||
Texture m_texture[2];
|
||||
bool m_reset;
|
||||
GSLocalMemory::Offset* m_fbo;
|
||||
GSLocalMemory::Offset* m_zbo;
|
||||
|
||||
__declspec(align(16)) struct VertexTrace
|
||||
{
|
||||
@@ -83,6 +85,8 @@ protected:
|
||||
|
||||
m_reset = false;
|
||||
}
|
||||
|
||||
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
|
||||
}
|
||||
|
||||
void ResetDevice()
|
||||
@@ -273,6 +277,12 @@ protected:
|
||||
|
||||
p.vm = m_mem.m_vm32;
|
||||
|
||||
m_fbo = m_mem.GetOffset(context->FRAME.Block(), context->FRAME.FBW, context->FRAME.PSM, m_fbo);
|
||||
m_zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM, m_zbo);
|
||||
|
||||
p.fbo = m_fbo;
|
||||
p.zbo = m_zbo;
|
||||
|
||||
p.sel.dw = 0;
|
||||
|
||||
p.sel.fpsm = 3;
|
||||
@@ -574,19 +584,19 @@ __int64 start = __rdtsc();
|
||||
data.count = m_count;
|
||||
data.param = &p;
|
||||
|
||||
int prims = m_rl.Draw(&data);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, prims);
|
||||
m_rl.Draw(&data);
|
||||
|
||||
GSRasterizerStats stats;
|
||||
|
||||
m_rl.GetStats(stats);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Draw, 1);
|
||||
|
||||
int pixels = m_rl.GetPixels();
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, pixels);
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
|
||||
/*
|
||||
__int64 diff = __rdtsc() - start;
|
||||
s_total += diff;
|
||||
//if(pixels > 50000)
|
||||
if(pixels >= 50000)
|
||||
fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I64d \n", frame, PRIM->PRIM, prims, pixels, p.sel, diff, diff / prims, pixels > 0 ? diff / pixels : 0);
|
||||
*/
|
||||
// TODO
|
||||
@@ -636,7 +646,7 @@ fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I6
|
||||
if(s_dump)
|
||||
{
|
||||
CString str;
|
||||
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
|
||||
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
|
||||
if(s_save) {m_mem.SaveBMP(str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameSize(1).cx, 512);}//GetFrameSize(1).cy);
|
||||
str.Format(_T("c:\\temp1\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
|
||||
if(s_savez) {m_mem.SaveBMP(str, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameSize(1).cx, 512);}
|
||||
@@ -651,6 +661,8 @@ fprintf(s_fp, "[%I64d, %d, %d, %d] %08x, diff = %I64d /prim = %I64d /pixel = %I6
|
||||
public:
|
||||
GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads)
|
||||
: GSRendererT(base, mt, irq, nloophack, rs)
|
||||
, m_fbo(NULL)
|
||||
, m_zbo(NULL)
|
||||
{
|
||||
m_rl.Create<GSDrawScanline>(this, threads);
|
||||
|
||||
|
||||
@@ -416,12 +416,7 @@ void GSState::GIFPackedRegHandlerSTQ(GIFPackedReg* r)
|
||||
|
||||
void GSState::GIFPackedRegHandlerUV(GIFPackedReg* r)
|
||||
{
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(r);
|
||||
m_v.UV.ai32[0] = (UINT32)GSVector4i::store(v.pu32(v)) & 0x3fff3fff;
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
#if _M_SSE >= 0x200
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
||||
m_v.UV.ai32[0] = (UINT32)GSVector4i::store(v.ps32(v));
|
||||
|
||||
@@ -22,21 +22,21 @@
|
||||
#include "StdAfx.h"
|
||||
#include "GSTables.h"
|
||||
|
||||
const DWORD blockTable32[4][8] = {
|
||||
const BYTE blockTable32[4][8] = {
|
||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||
{ 8, 9, 12, 13, 24, 25, 28, 29},
|
||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||
};
|
||||
|
||||
const DWORD blockTable32Z[4][8] = {
|
||||
const BYTE blockTable32Z[4][8] = {
|
||||
{ 24, 25, 28, 29, 8, 9, 12, 13},
|
||||
{ 26, 27, 30, 31, 10, 11, 14, 15},
|
||||
{ 16, 17, 20, 21, 0, 1, 4, 5},
|
||||
{ 18, 19, 22, 23, 2, 3, 6, 7}
|
||||
};
|
||||
|
||||
const DWORD blockTable16[8][4] = {
|
||||
const BYTE blockTable16[8][4] = {
|
||||
{ 0, 2, 8, 10 },
|
||||
{ 1, 3, 9, 11 },
|
||||
{ 4, 6, 12, 14 },
|
||||
@@ -47,7 +47,7 @@ const DWORD blockTable16[8][4] = {
|
||||
{ 21, 23, 29, 31 }
|
||||
};
|
||||
|
||||
const DWORD blockTable16S[8][4] = {
|
||||
const BYTE blockTable16S[8][4] = {
|
||||
{ 0, 2, 16, 18 },
|
||||
{ 1, 3, 17, 19 },
|
||||
{ 8, 10, 24, 26 },
|
||||
@@ -58,7 +58,7 @@ const DWORD blockTable16S[8][4] = {
|
||||
{ 13, 15, 29, 31 }
|
||||
};
|
||||
|
||||
const DWORD blockTable16Z[8][4] = {
|
||||
const BYTE blockTable16Z[8][4] = {
|
||||
{ 24, 26, 16, 18 },
|
||||
{ 25, 27, 17, 19 },
|
||||
{ 28, 30, 20, 22 },
|
||||
@@ -69,7 +69,7 @@ const DWORD blockTable16Z[8][4] = {
|
||||
{ 13, 15, 5, 7 }
|
||||
};
|
||||
|
||||
const DWORD blockTable16SZ[8][4] = {
|
||||
const BYTE blockTable16SZ[8][4] = {
|
||||
{ 24, 26, 8, 10 },
|
||||
{ 25, 27, 9, 11 },
|
||||
{ 16, 18, 0, 2 },
|
||||
@@ -80,14 +80,14 @@ const DWORD blockTable16SZ[8][4] = {
|
||||
{ 21, 23, 5, 7 }
|
||||
};
|
||||
|
||||
const DWORD blockTable8[4][8] = {
|
||||
const BYTE blockTable8[4][8] = {
|
||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||
{ 8, 9, 12, 13, 24, 25, 28, 29},
|
||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||
};
|
||||
|
||||
const DWORD blockTable4[8][4] = {
|
||||
const BYTE blockTable4[8][4] = {
|
||||
{ 0, 2, 8, 10 },
|
||||
{ 1, 3, 9, 11 },
|
||||
{ 4, 6, 12, 14 },
|
||||
@@ -98,7 +98,7 @@ const DWORD blockTable4[8][4] = {
|
||||
{ 21, 23, 29, 31 }
|
||||
};
|
||||
|
||||
const DWORD columnTable32[8][8] = {
|
||||
const BYTE columnTable32[8][8] = {
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13 },
|
||||
{ 2, 3, 6, 7, 10, 11, 14, 15 },
|
||||
{ 16, 17, 20, 21, 24, 25, 28, 29 },
|
||||
@@ -109,7 +109,7 @@ const DWORD columnTable32[8][8] = {
|
||||
{ 50, 51, 54, 55, 58, 59, 62, 63 },
|
||||
};
|
||||
|
||||
const DWORD columnTable16[8][16] = {
|
||||
const BYTE columnTable16[8][16] = {
|
||||
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
{ 4, 6, 12, 14, 20, 22, 28, 30,
|
||||
@@ -128,7 +128,7 @@ const DWORD columnTable16[8][16] = {
|
||||
101, 103, 109, 111, 117, 119, 125, 127 },
|
||||
};
|
||||
|
||||
const DWORD columnTable8[16][16] = {
|
||||
const BYTE columnTable8[16][16] = {
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 8, 12, 24, 28, 40, 44, 56, 60,
|
||||
@@ -163,7 +163,7 @@ const DWORD columnTable8[16][16] = {
|
||||
203, 207, 219, 223, 235, 239, 251, 255 },
|
||||
};
|
||||
|
||||
const DWORD columnTable4[16][32] = {
|
||||
const WORD columnTable4[16][32] = {
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
@@ -230,7 +230,7 @@ const DWORD columnTable4[16][32] = {
|
||||
407, 415, 439, 447, 471, 479, 503, 511 },
|
||||
};
|
||||
|
||||
const DWORD clutTableT32I8[128] =
|
||||
const BYTE clutTableT32I8[128] =
|
||||
{
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79,
|
||||
@@ -242,13 +242,13 @@ const DWORD clutTableT32I8[128] =
|
||||
112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
|
||||
};
|
||||
|
||||
const DWORD clutTableT32I4[16] =
|
||||
const BYTE clutTableT32I4[16] =
|
||||
{
|
||||
0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15
|
||||
};
|
||||
|
||||
const DWORD clutTableT16I8[32] =
|
||||
const BYTE clutTableT16I8[32] =
|
||||
{
|
||||
0, 2, 8, 10, 16, 18, 24, 26,
|
||||
4, 6, 12, 14, 20, 22, 28, 30,
|
||||
@@ -256,7 +256,7 @@ const DWORD clutTableT16I8[32] =
|
||||
5, 7, 13, 15, 21, 23, 29, 31
|
||||
};
|
||||
|
||||
const DWORD clutTableT16I4[16] =
|
||||
const BYTE clutTableT16I4[16] =
|
||||
{
|
||||
0, 2, 8, 10, 16, 18, 24, 26,
|
||||
4, 6, 12, 14, 20, 22, 28, 30
|
||||
|
||||
@@ -21,19 +21,19 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
extern const DWORD blockTable32[4][8];
|
||||
extern const DWORD blockTable32Z[4][8];
|
||||
extern const DWORD blockTable16[8][4];
|
||||
extern const DWORD blockTable16S[8][4];
|
||||
extern const DWORD blockTable16Z[8][4];
|
||||
extern const DWORD blockTable16SZ[8][4];
|
||||
extern const DWORD blockTable8[4][8];
|
||||
extern const DWORD blockTable4[8][4];
|
||||
extern const DWORD columnTable32[8][8];
|
||||
extern const DWORD columnTable16[8][16];
|
||||
extern const DWORD columnTable8[16][16];
|
||||
extern const DWORD columnTable4[16][32];
|
||||
extern const DWORD clutTableT32I8[128];
|
||||
extern const DWORD clutTableT32I4[16];
|
||||
extern const DWORD clutTableT16I8[32];
|
||||
extern const DWORD clutTableT16I4[16];
|
||||
extern const BYTE blockTable32[4][8];
|
||||
extern const BYTE blockTable32Z[4][8];
|
||||
extern const BYTE blockTable16[8][4];
|
||||
extern const BYTE blockTable16S[8][4];
|
||||
extern const BYTE blockTable16Z[8][4];
|
||||
extern const BYTE blockTable16SZ[8][4];
|
||||
extern const BYTE blockTable8[4][8];
|
||||
extern const BYTE blockTable4[8][4];
|
||||
extern const BYTE columnTable32[8][8];
|
||||
extern const BYTE columnTable16[8][16];
|
||||
extern const BYTE columnTable8[16][16];
|
||||
extern const WORD columnTable4[16][32];
|
||||
extern const BYTE clutTableT32I8[128];
|
||||
extern const BYTE clutTableT32I4[16];
|
||||
extern const BYTE clutTableT16I8[32];
|
||||
extern const BYTE clutTableT16I4[16];
|
||||
|
||||
Reference in New Issue
Block a user