gsdx-ogl: micro optimize PSConstantBuffer cache

Might help to save a cache line on the CPU :)
This commit is contained in:
Gregory Hainaut 2014-10-07 19:11:43 +02:00
parent 58bd645d49
commit 84f844767c
4 changed files with 22 additions and 21 deletions

View File

@ -294,12 +294,12 @@ class GSDeviceOGL : public GSDevice
__aligned(struct, 32) PSConstantBuffer __aligned(struct, 32) PSConstantBuffer
{ {
GSVector4 FogColor_AREF; GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH; GSVector4 WH;
GSVector4 MinMax;
GSVector4 MinF_TA; GSVector4 MinF_TA;
GSVector4i MskFix; GSVector4i MskFix;
GSVector4 HalfTexel;
GSVector4 MinMax;
GSVector4 TC_OffsetHack; GSVector4 TC_OffsetHack;
PSConstantBuffer() PSConstantBuffer()
@ -319,12 +319,13 @@ class GSDeviceOGL : public GSDevice
// if WH matches both HalfTexel and TC_OffsetHack do too // if WH matches both HalfTexel and TC_OffsetHack do too
// MinMax depends on WH and MskFix so no need to check it too // MinMax depends on WH and MskFix so no need to check it too
if(!((a[0] == b[0]) & (a[2] == b[2]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue()) if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue())
{ {
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0]; a[0] = b[0];
a[1] = b[1];
a[2] = b[2]; a[2] = b[2];
a[4] = b[4]; a[3] = b[3];
a[5] = b[5];
return true; return true;
} }

View File

@ -26,7 +26,7 @@ GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r) : m_renderer(r)
{ {
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
UserHacks_NVIDIAHack = !!theApp.GetConfig("UserHacks_NVIDIAHack", 0) && !!theApp.GetConfig("UserHacks", 0); UserHacks_NVIDIAHack = !!theApp.GetConfig("UserHacks_NVIDIAHack", 0) && !!theApp.GetConfig("UserHacks", 0);
m_paltex = !!theApp.GetConfig("paltex", 0); m_paltex = !!theApp.GetConfig("paltex", 0);
@ -105,10 +105,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Arc the Lad finds the wrong surface here when looking for a depth stencil. // Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything // (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.) // else.)
//for(int type = 0; type < 2 && dst == NULL; type++) //for(int type = 0; type < 2 && dst == NULL; type++)
for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil
{ {
@ -320,7 +320,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
GSVector4i r; GSVector4i r;
uint32* pages = (uint32*)m_temp; uint32* pages = (uint32*)m_temp;
o->GetPages(rect, pages, &r); o->GetPages(rect, pages, &r);
bool found = false; bool found = false;
@ -349,7 +349,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
if(s->m_repeating) if(s->m_repeating)
{ {
vector<GSVector2i>& l = s->m_p2t[page]; vector<GSVector2i>& l = s->m_p2t[page];
for(vector<GSVector2i>::iterator k = l.begin(); k != l.end(); k++) for(vector<GSVector2i>::iterator k = l.begin(); k != l.end(); k++)
{ {
valid[k->x] &= k->y; valid[k->x] &= k->y;
@ -468,7 +468,7 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r)
} }
} }
} }
//GSTextureCache::Target* rt2 = NULL; //GSTextureCache::Target* rt2 = NULL;
//int ymin = INT_MAX; //int ymin = INT_MAX;
//for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) //for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
@ -535,7 +535,7 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r)
// Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin)); // Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin));
//} //}
// TODO: ds // TODO: ds
} }
@ -594,13 +594,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
bool hack = false; bool hack = false;
if(m_spritehack && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)) if(m_spritehack && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H))
{ {
src->m_spritehack_t = true; src->m_spritehack_t = true;
if(m_spritehack == 2 && TEX0.CPSM != PSM_PSMCT16) if(m_spritehack == 2 && TEX0.CPSM != PSM_PSMCT16)
src->m_spritehack_t = false; src->m_spritehack_t = false;
} }
else else
src->m_spritehack_t = false; src->m_spritehack_t = false;

View File

@ -787,12 +787,12 @@ static const char* tfx_glsl =
"{\n" "{\n"
" vec3 FogColor;\n" " vec3 FogColor;\n"
" float AREF;\n" " float AREF;\n"
" vec4 HalfTexel;\n"
" vec4 WH;\n" " vec4 WH;\n"
" vec4 MinMax;\n"
" vec2 MinF;\n" " vec2 MinF;\n"
" vec2 TA;\n" " vec2 TA;\n"
" uvec4 MskFix;\n" " uvec4 MskFix;\n"
" vec4 HalfTexel;\n"
" vec4 MinMax;\n"
" vec4 TC_OffsetHack;\n" " vec4 TC_OffsetHack;\n"
"};\n" "};\n"
"\n" "\n"

View File

@ -336,12 +336,12 @@ layout(std140, binding = 21) uniform cb21
{ {
vec3 FogColor; vec3 FogColor;
float AREF; float AREF;
vec4 HalfTexel;
vec4 WH; vec4 WH;
vec4 MinMax;
vec2 MinF; vec2 MinF;
vec2 TA; vec2 TA;
uvec4 MskFix; uvec4 MskFix;
vec4 HalfTexel;
vec4 MinMax;
vec4 TC_OffsetHack; vec4 TC_OffsetHack;
}; };