From 84f844767c2ec46eacab261cc7c0aa26540d7a34 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 7 Oct 2014 19:11:43 +0200 Subject: [PATCH] gsdx-ogl: micro optimize PSConstantBuffer cache Might help to save a cache line on the CPU :) --- plugins/GSdx/GSDeviceOGL.h | 11 ++++++----- plugins/GSdx/GSTextureCache.cpp | 24 ++++++++++++------------ plugins/GSdx/res/glsl_source.h | 4 ++-- plugins/GSdx/res/tfx.glsl | 4 ++-- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 68acabc5e..20fcdbb58 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -294,12 +294,12 @@ class GSDeviceOGL : public GSDevice __aligned(struct, 32) PSConstantBuffer { GSVector4 FogColor_AREF; - GSVector4 HalfTexel; GSVector4 WH; - GSVector4 MinMax; GSVector4 MinF_TA; GSVector4i MskFix; + GSVector4 HalfTexel; + GSVector4 MinMax; GSVector4 TC_OffsetHack; PSConstantBuffer() @@ -319,12 +319,13 @@ class GSDeviceOGL : public GSDevice // if WH matches both HalfTexel and TC_OffsetHack do too // MinMax depends on WH and MskFix so no need to check it too - if(!((a[0] == b[0]) & (a[2] == b[2]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue()) + if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue()) { + // Note previous check uses SSE already, a plain copy will be faster than any memcpy a[0] = b[0]; + a[1] = b[1]; a[2] = b[2]; - a[4] = b[4]; - a[5] = b[5]; + a[3] = b[3]; return true; } diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index b4607af7b..635c12e70 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -26,7 +26,7 @@ GSTextureCache::GSTextureCache(GSRenderer* r) : m_renderer(r) { m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; - + UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); UserHacks_NVIDIAHack = !!theApp.GetConfig("UserHacks_NVIDIAHack", 0) && !!theApp.GetConfig("UserHacks", 0); m_paltex = !!theApp.GetConfig("paltex", 0); @@ -105,10 +105,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. - + // (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything // else.) - + //for(int type = 0; type < 2 && dst == NULL; type++) for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil { @@ -320,7 +320,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo GSVector4i r; uint32* pages = (uint32*)m_temp; - + o->GetPages(rect, pages, &r); bool found = false; @@ -349,7 +349,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo if(s->m_repeating) { vector& l = s->m_p2t[page]; - + for(vector::iterator k = l.begin(); k != l.end(); k++) { valid[k->x] &= k->y; @@ -468,7 +468,7 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r) } } } - + //GSTextureCache::Target* rt2 = NULL; //int ymin = INT_MAX; //for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) @@ -535,7 +535,7 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* o, const GSVector4i& r) // Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin)); //} - + // TODO: ds } @@ -594,13 +594,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con bool hack = false; - if(m_spritehack && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)) + if(m_spritehack && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)) { src->m_spritehack_t = true; - - if(m_spritehack == 2 && TEX0.CPSM != PSM_PSMCT16) - src->m_spritehack_t = false; - } + + if(m_spritehack == 2 && TEX0.CPSM != PSM_PSMCT16) + src->m_spritehack_t = false; + } else src->m_spritehack_t = false; diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 907a88468..f5b682de1 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -787,12 +787,12 @@ static const char* tfx_glsl = "{\n" " vec3 FogColor;\n" " float AREF;\n" - " vec4 HalfTexel;\n" " vec4 WH;\n" - " vec4 MinMax;\n" " vec2 MinF;\n" " vec2 TA;\n" " uvec4 MskFix;\n" + " vec4 HalfTexel;\n" + " vec4 MinMax;\n" " vec4 TC_OffsetHack;\n" "};\n" "\n" diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl index acf436b15..c4f3536f4 100644 --- a/plugins/GSdx/res/tfx.glsl +++ b/plugins/GSdx/res/tfx.glsl @@ -336,12 +336,12 @@ layout(std140, binding = 21) uniform cb21 { vec3 FogColor; float AREF; - vec4 HalfTexel; vec4 WH; - vec4 MinMax; vec2 MinF; vec2 TA; uvec4 MskFix; + vec4 HalfTexel; + vec4 MinMax; vec4 TC_OffsetHack; };