diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 41fb20b118..85fe07170e 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -168,6 +168,7 @@ cbuffer cb1 float4 LODParams; float4 STRange; int4 ChannelShuffle; + float2 ChannelShuffleOffset; float2 TC_OffsetHack; float2 STScale; float4x4 DitherMatrix; @@ -757,17 +758,17 @@ float4 ps_color(PS_INPUT input) #endif #if PS_CHANNEL_FETCH == 1 - float4 T = fetch_red(int2(input.p.xy)); + float4 T = fetch_red(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 2 - float4 T = fetch_green(int2(input.p.xy)); + float4 T = fetch_green(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 3 - float4 T = fetch_blue(int2(input.p.xy)); + float4 T = fetch_blue(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 4 - float4 T = fetch_alpha(int2(input.p.xy)); + float4 T = fetch_alpha(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 5 - float4 T = fetch_rgb(int2(input.p.xy)); + float4 T = fetch_rgb(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 6 - float4 T = fetch_gXbY(int2(input.p.xy)); + float4 T = fetch_gXbY(int2(input.p.xy + ChannelShuffleOffset)); #elif PS_DEPTH_FMT > 0 float4 T = sample_depth(st_int, input.p.xy); #else diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index fc642a9676..2955c11f0f 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -49,6 +49,7 @@ layout(std140, binding = 0) uniform cb21 vec4 STRange; ivec4 ChannelShuffle; + vec2 ChannelShuffleOffset; vec2 TC_OffsetHack; vec2 STScale; @@ -315,7 +316,7 @@ int fetch_raw_depth() #if PS_TEX_IS_FB == 1 return int(sample_from_rt().r * multiplier); #else - return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier); + return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy + ChannelShuffleOffset), 0).r * multiplier); #endif } @@ -324,7 +325,7 @@ vec4 fetch_raw_color() #if PS_TEX_IS_FB == 1 return sample_from_rt(); #else - return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0); + return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy + ChannelShuffleOffset), 0); #endif } diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 8e7bd53f4d..69237e11f0 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -316,6 +316,7 @@ layout(std140, set = 0, binding = 1) uniform cb1 vec4 LODParams; vec4 STRange; ivec4 ChannelShuffle; + vec2 ChannelShuffleOffset; vec2 TC_OffsetHack; vec2 STScale; mat4 DitherMatrix; @@ -926,17 +927,17 @@ vec4 ps_color() #if !NEEDS_TEX vec4 T = vec4(0.0f); #elif PS_CHANNEL_FETCH == 1 - vec4 T = fetch_red(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_red(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 2 - vec4 T = fetch_green(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_green(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 3 - vec4 T = fetch_blue(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_blue(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 4 - vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 5 - vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_CHANNEL_FETCH == 6 - vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy)); + vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy + ChannelShuffleOffset)); #elif PS_DEPTH_FMT > 0 vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy)); #else diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 70c22368d7..ffa74b8c61 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1202,7 +1202,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) // Urban Chaos writes to the memory backing the CLUT in the middle of a shuffle, and // it's unclear whether the CLUT would actually get reloaded in that case. if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP()) - m_channel_shuffle_abort = true; + m_channel_shuffle_finish = true; } TEX0.CPSM &= 0xa; // 1010b diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index bc05348852..f26db3bfa3 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -258,7 +258,7 @@ public: bool m_using_temp_z = false; bool m_temp_z_full_copy = false; bool m_in_target_draw = false; - bool m_channel_shuffle_abort = false; + bool m_channel_shuffle_finish = false; u32 m_target_offset = 0; u8 m_scanmask_used = 0; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index c3028903ea..6ca3e6aff3 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -622,6 +622,7 @@ struct alignas(16) GSHWDrawConfig GSVector4 LODParams; GSVector4 STRange; GSVector4i ChannelShuffle; + GSVector2 ChannelShuffleOffset; GSVector2 TCOffsetHack; GSVector2 STScale; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index c9a3329dc6..ca1b0f192b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2345,9 +2345,18 @@ void GSRendererHW::Draw() // Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient. // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. - m_channel_shuffle = !m_channel_shuffle_abort && IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && - m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block() && - m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; + const bool is_hle_skip = m_conf.ps.urban_chaos_hle || m_conf.ps.tales_of_abyss_hle; + const u32 max_skip = ((m_channel_shuffle_finish || !m_channel_shuffle_width) ? 1 : m_channel_shuffle_width) << 5; + const bool shuffle_detect = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && (m_last_channel_shuffle_fbp + max_skip) >= m_context->FRAME.Block() && + m_last_channel_shuffle_end_block > m_context->FRAME.Block() && m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0 + && (m_last_channel_shuffle_tbp + max_skip) >= m_context->TEX0.TBP0; + + const bool shuffle_detect_loose = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && + m_last_channel_shuffle_end_block > m_context->FRAME.Block() && m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0; + + m_channel_shuffle = !m_channel_shuffle_finish && ((!is_hle_skip && shuffle_detect) || (is_hle_skip && shuffle_detect_loose)); if (m_channel_shuffle) { @@ -2415,12 +2424,21 @@ void GSRendererHW::Draw() CleanupDraw(false); } } + + if (!shuffle_detect) + { + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_tbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; + } #ifdef ENABLE_OGL_DEBUG if (num_skipped_channel_shuffle_draws > 0) GL_CACHE("HW: Skipped %d channel shuffle draws ending at %d", num_skipped_channel_shuffle_draws, s_n); #endif num_skipped_channel_shuffle_draws = 0; - + } + else + { m_last_channel_shuffle_fbp = 0xffff; m_last_channel_shuffle_tbp = 0xffff; m_last_channel_shuffle_end_block = 0xffff; @@ -2429,7 +2447,7 @@ void GSRendererHW::Draw() m_last_rt = nullptr; m_channel_shuffle_width = 0; m_full_screen_shuffle = false; - m_channel_shuffle_abort = false; + m_channel_shuffle_finish = false; m_channel_shuffle_src_valid = GSVector4i::zero(); GL_PUSH("HW: Draw %d (Context %u)", s_n, PRIM->CTXT); @@ -3905,7 +3923,6 @@ void GSRendererHW::Draw() if (m_channel_shuffle) { - m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. @@ -4042,7 +4059,6 @@ void GSRendererHW::Draw() m_last_channel_shuffle_fbmsk = m_context->FRAME.FBMSK; if (rt) { - m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; m_last_channel_shuffle_tbp = src->m_TEX0.TBP0; // Urban Chaos goes from Z16 to C32, so let's just use the rt's original end block. if (!src->m_from_target || GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].bpp != GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) @@ -4741,6 +4757,10 @@ void GSRendererHW::Draw() // Limit to 2x the vertical height of the resolution (for double buffering) rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); + if (m_channel_shuffle) + { + m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + } } if (ds) @@ -5547,7 +5567,6 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t s[1].V = 16384; m_r = GSVector4i(0, 0, 1024, 1024); - // We need to count the pages that get shuffled to, some games (like Hitman Blood Money dialogue blur effects) only do half the screen. if (!m_full_screen_shuffle && !m_conf.ps.urban_chaos_hle && !m_conf.ps.tales_of_abyss_hle && src) { @@ -5557,21 +5576,14 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t m_channel_shuffle_width = src->m_TEX0.TBW; } + + m_channel_shuffle_finish = false; } else { const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * rt->m_TEX0.TBW)), 0); m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); - // Hitman suffers from this, not sure on the exact scenario at the moment, but we need the barrier. - if (NeedsBlending() && m_context->ALPHA.IsCdInBlend()) - { - // Needed to enable IsFeedbackLoop. - m_conf.ps.channel_fb = 1; - // Assume no overlap when it's a channel shuffle, no need for full barriers. - m_conf.require_one_barrier = true; - } - // This is for offsetting the texture, however if the texture has a region clamp, we don't want to move it. // A good two test games for this is Ghost in the Shell (no region clamp) and Tekken 5 (offset clamp on shadows) if (rt && rt->m_TEX0.TBP0 == m_cached_ctx.FRAME.Block()) @@ -5595,7 +5607,6 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t s[1].U = m_r.z << 4; s[0].V = m_r.y << 4; s[1].V = m_r.w << 4; - m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; // If we're doing per page copying, then set the valid 1 frame ahead if we're continuing, as this will save the target lookup making a new target for the new row. const u32 frame_offset = m_cached_ctx.FRAME.Block() + (IsPageCopy() ? 0x20 : 0); @@ -5614,13 +5625,15 @@ __ri u32 GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool t new_valid.w = std::max(new_valid.w, offset_height); rt->UpdateValidity(new_valid, true); + + m_channel_shuffle_finish = true; } m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; m_primitive_covers_without_gaps = NoGapsType::FullCover; - m_channel_shuffle_abort = false; + m_conf.cb_ps.ChannelShuffleOffset = GSVector2(0, 0); return true; } @@ -6426,7 +6439,10 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, { // don't overwrite the texture when using channel shuffle, but keep the palette if (!m_channel_shuffle) + { + m_conf.cb_ps.ChannelShuffleOffset = GSVector2(0, 0); m_conf.tex = tex->m_texture; + } m_conf.pal = tex->m_palette; // Hazard handling (i.e. reading from the current RT/DS). @@ -6818,7 +6834,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const GSTextureCache::Target* src_target = nullptr; if (!m_downscale_source || !tex->m_from_target) { - if (rt && m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) + if (rt && m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -6901,37 +6917,31 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i::storel(©_dst_offset, copy_range); if (m_channel_shuffle && (tex_diff || frame_diff)) { + const u32 max_skip = ((m_channel_shuffle_finish || !m_channel_shuffle_width) ? 1 : m_channel_shuffle_width) << 5; + const bool new_shuffle = !(m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && (m_last_channel_shuffle_fbp + max_skip) >= m_context->FRAME.Block() && + m_last_channel_shuffle_end_block > m_context->FRAME.Block() && m_last_channel_shuffle_tbp <= m_context->TEX0.TBP0 && (m_last_channel_shuffle_tbp + max_skip) >= m_context->TEX0.TBP0); - const u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; - const u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; - const u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; - - copy_range.x += horizontal_offset; - copy_range.y += vertical_offset; - copy_range.z += horizontal_offset; - copy_range.w += vertical_offset; - - if (!m_channel_shuffle) + if (rt == tex->m_from_target && new_shuffle) { - copy_size.y -= vertical_offset; - copy_size.x -= horizontal_offset; + if (m_prim_overlap == PRIM_OVERLAP_NO || !(g_gs_device->Features().texture_barrier || g_gs_device->Features().multidraw_fb_copy)) + m_conf.require_one_barrier = true; + else + m_conf.require_full_barrier = true; } + + const int page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; + const int horizontal_offset = ((page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x); + const int vertical_offset = ((page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y); + + m_conf.cb_ps.ChannelShuffleOffset = GSVector2((horizontal_offset - m_r.x) * tex->GetScale(), (vertical_offset - m_r.y) * tex->GetScale()); + m_conf.ps.channel_fb = 1; target_region = false; source_region.bits = 0; - //copied_rt = tex->m_from_target != nullptr; - if (m_in_target_draw && (page_offset || frame_diff)) - { - copy_range.z = copy_range.x + m_r.width(); - copy_range.w = copy_range.y + m_r.height(); - if (tex_diff != frame_diff) - { - GSVector4i::storel(©_dst_offset, m_r); - } - } - - copy_range.z = std::min(copy_range.z, src_target->m_unscaled_size.x); - copy_range.w = std::min(copy_range.w, src_target->m_unscaled_size.y); + unscaled_size = src_target->GetUnscaledSize(); + scale = src_target->GetScale(); + return; } } else diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 497389f8a4..42d339049e 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -2081,6 +2081,7 @@ static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, HalfTexel) == of static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MinMax) == offsetof(GSMTLMainPSUniform, uv_min_max)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STRange) == offsetof(GSMTLMainPSUniform, st_range)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle) == offsetof(GSMTLMainPSUniform, channel_shuffle)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffleOffset) == offsetof(GSMTLMainPSUniform, channel_shuffle_offset)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack) == offsetof(GSMTLMainPSUniform, tc_offset)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale) == offsetof(GSMTLMainPSUniform, st_scale)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, DitherMatrix) == offsetof(GSMTLMainPSUniform, dither_matrix)); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 1ba265d5f9..de98dae597 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -129,6 +129,7 @@ struct GSMTLMainPSUniform unsigned int green_mask; unsigned int green_shift; } channel_shuffle; + vector_float2 channel_shuffle_offset; vector_float2 tc_offset; vector_float2 st_scale; matrix_float4x4 dither_matrix; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 942a45e16c..3779e3dfea 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -509,7 +509,7 @@ struct PSMain uint fetch_raw_depth() { - return tex_depth.read(ushort2(in.p.xy)) * 0x1p32f; + return tex_depth.read(ushort2(in.p.xy + cb.channel_shuffle_offset)) * 0x1p32f; } float4 fetch_raw_color() @@ -517,7 +517,7 @@ struct PSMain if (PS_TEX_IS_FB) return current_color; else - return tex.read(ushort2(in.p.xy)); + return tex.read(ushort2(in.p.xy + cb.channel_shuffle_offset)); } float4 fetch_c(ushort2 uv) diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 7851b71120..1dc2139a36 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 79; +static constexpr u32 SHADER_CACHE_VERSION = 80;