Compare commits

...

5 Commits

Author SHA1 Message Date
refractionpcsx2
f3b4c50909 GS/HW: Improve Native Scaling detection + Include direct mem reads 2025-06-20 02:02:30 +02:00
lightningterror
a45f27e6e9 GS/DX: Don't output color for datm/stencil date shaders.
RTV(Render target view) is not bound so no need
to output anything, just clip/discard if needed.

Fixes dx11 api warning that pixel shader writes color
output but no rtv is bound.
2025-06-20 02:01:47 +02:00
lightningterror
2f84bf0cca GS/TC: Don't enable Frame buffer conversion on PSMT8.
We have dedicated shader now, no need.
2025-06-20 02:01:28 +02:00
refractionpcsx2
3e2c3e5075 GS/TC: Slight fix for rect translation with block offset 2025-06-20 02:00:51 +02:00
refractionpcsx2
154259d0a6 GS/HW: Don't resize target if only writing to alpha when RGB is valid 2025-06-20 02:00:51 +02:00
5 changed files with 62 additions and 48 deletions

View File

@@ -110,48 +110,24 @@ uint ps_convert_rgba8_16bits(PS_INPUT input) : SV_Target0
return ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8);
}
PS_OUTPUT ps_datm1(PS_INPUT input)
void ps_datm1(PS_INPUT input)
{
PS_OUTPUT output;
clip(sample_c(input.t).a - 127.5f / 255); // >= 0x80 pass
output.c = 0;
return output;
}
PS_OUTPUT ps_datm0(PS_INPUT input)
void ps_datm0(PS_INPUT input)
{
PS_OUTPUT output;
clip(127.5f / 255 - sample_c(input.t).a); // < 0x80 pass (== 0x80 should not pass)
output.c = 0;
return output;
}
PS_OUTPUT ps_datm1_rta_correction(PS_INPUT input)
void ps_datm1_rta_correction(PS_INPUT input)
{
PS_OUTPUT output;
clip(sample_c(input.t).a - 254.5f / 255); // >= 0x80 pass
output.c = 0;
return output;
}
PS_OUTPUT ps_datm0_rta_correction(PS_INPUT input)
void ps_datm0_rta_correction(PS_INPUT input)
{
PS_OUTPUT output;
clip(254.5f / 255 - sample_c(input.t).a); // < 0x80 pass (== 0x80 should not pass)
output.c = 0;
return output;
}
PS_OUTPUT ps_rta_correction(PS_INPUT input)

View File

@@ -104,7 +104,6 @@ void ps_datm1()
{
if(sample_c(v_tex).a < (127.5f / 255.0f)) // >= 0x80 pass
discard;
}
#endif

View File

@@ -3045,19 +3045,19 @@ void GSRendererHW::Draw()
if (scale_draw == 1)
{
target_scale = 1.0f;
m_downscale_source = src->m_from_target->GetScale() > 1.0f;
m_downscale_source = src->m_from_target ? src->m_from_target->GetScale() > 1.0f : false;
}
else
m_downscale_source = GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa.
m_downscale_source = (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive || !src->m_from_target) ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa.
}
else
{
// if it's directly copying keep the scale - Ratchet and clank hits this, stops edge garbage happening.
// Keep it to small targets of 256 or lower.
if (scale_draw == -1 && src && src->m_from_target && src->m_from_target->m_downscaled && ((static_cast<int>(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) &&
if (scale_draw == -1 && src && (!src->m_from_target || (src->m_from_target && src->m_from_target->m_downscaled)) && ((static_cast<int>(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) &&
(GSVector4i(m_vt.m_min.p).xyxy() == GSVector4i(m_vt.m_min.t).xyxy()).alltrue() && (GSVector4i(m_vt.m_max.p).xyxy() == GSVector4i(m_vt.m_max.t).xyxy()).alltrue()) || possible_shuffle))
{
target_scale = src->m_from_target->GetScale();
target_scale = src->m_from_target ? src->m_from_target->GetScale() : 1.0f;
scale_draw = 1;
scaled_copy = true;
}
@@ -3073,7 +3073,7 @@ void GSRendererHW::Draw()
// This upscaling hack is for games which construct P8 textures by drawing a bunch of small sprites in C32,
// then reinterpreting it as P8. We need to keep the off-screen intermediate textures at native resolution,
// but not propagate that through to the normal render targets. Test Case: Crash Wrath of Cortex.
if (no_ds && src && !m_channel_shuffle && src->m_from_target && (GSConfig.UserHacks_NativePaletteDraw || (src->m_from_target->m_downscaled && scale_draw <= 1)) &&
if (no_ds && src && !m_channel_shuffle && src->m_from_target && (GSConfig.UserHacks_NativePaletteDraw || (src->m_target_direct && src->m_from_target->m_downscaled && scale_draw <= 1)) &&
src->m_scale == 1.0f && (src->m_TEX0.PSM == PSMT8 || src->m_TEX0.TBP0 == m_cached_ctx.FRAME.Block()))
{
GL_CACHE("HW: Using native resolution for target based on texture source");
@@ -3405,7 +3405,7 @@ void GSRendererHW::Draw()
return;
}
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src, possible_shuffle), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale,
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src, possible_shuffle), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? ((src && src->m_from_target) ? src->m_from_target->GetScale() : (ds ? ds->m_scale : 1.0f)) : target_scale,
GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src);
if (!rt) [[unlikely]]
@@ -4039,7 +4039,8 @@ void GSRendererHW::Draw()
bool valid_width_change = false;
if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw)
{
valid_width_change = rt->m_TEX0.TBW != FRAME_TEX0.TBW;
const u32 frame_mask = (m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk);
valid_width_change = rt->m_TEX0.TBW != FRAME_TEX0.TBW && (frame_mask != (frame_psm.fmsk & 0x00FFFFFF) || rt->m_valid_rgb == false);
if (valid_width_change && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000))
{
// Alpha could be a font, and since the width is changing it's no longer valid.
@@ -8974,6 +8975,9 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual()
int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps)
{
if (GSConfig.UserHacks_NativeScaling == GSNativeScaling::Off)
return 0;
const GSVector2i draw_size = GSVector2i(m_vt.m_max.p.x - m_vt.m_min.p.x, m_vt.m_max.p.y - m_vt.m_min.p.y);
GSVector2i tex_size = GSVector2i(m_vt.m_max.t.x - m_vt.m_min.t.x, m_vt.m_max.t.y - m_vt.m_min.t.y);
@@ -8984,10 +8988,13 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps)
// Try to catch cases of stupid draws like Manhunt and Syphon Filter where they sample a single pixel.
// Also make sure it's grabbing most of the texture.
if (tex_size.x == 0 || tex_size.y == 0 || draw_size.x == 0 || draw_size.y == 0 || !is_target_src)
if (tex_size.x == 0 || tex_size.y == 0 || draw_size.x == 0 || draw_size.y == 0)
return 0;
if (is_target_src && src->m_from_target->m_downscaled && std::abs(draw_size.x - tex_size.x) <= 1 && std::abs(draw_size.y - tex_size.y) <= 1)
const bool no_resize = (std::abs(draw_size.x - tex_size.x) <= 1 && std::abs(draw_size.y - tex_size.y) <= 1);
const bool can_maintain = no_resize || (!is_target_src && m_index.tail == 2);
if (!src || ((!is_target_src || src->m_from_target->m_downscaled) && can_maintain))
return -1;
const GSDrawingContext& next_ctx = m_env.CTXT[m_env.PRIM.CTXT];
@@ -9000,8 +9007,9 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps)
const bool is_downscale = m_cached_ctx.TEX0.TBW >= m_cached_ctx.FRAME.FBW && draw_size.x <= (tex_size.x * 0.75f) && draw_size.y <= (tex_size.y * 0.75f);
// Check we're getting most of the texture and not just stenciling a part of it.
// Only allow non-bilineared downscales if it's most of the target (misdetections of shadows in Naruto, Transformers etc), otherwise it's fine.
const GSVector2i tex_size_half = GSVector2i((src->GetRegion().HasX() ? src->GetRegionSize().x : src->m_from_target->m_valid.width()) / 2, (src->GetRegion().HasY() ? src->GetRegionSize().y : src->m_from_target->m_valid.height()) / 2);
const bool possible_downscale = m_context->TEX1.MMAG == 1 || src->m_from_target->m_downscaled || tex_size.x >= tex_size_half.x || tex_size.y >= tex_size_half.y;
const GSVector4i src_valid = src->m_from_target ? src->m_from_target->m_valid : src->m_valid_rect;
const GSVector2i tex_size_half = GSVector2i((src->GetRegion().HasX() ? src->GetRegionSize().x : src_valid.width()) / 2, (src->GetRegion().HasY() ? src->GetRegionSize().y : src_valid.height()) / 2);
const bool possible_downscale = m_context->TEX1.MMIN == 1 || !src->m_from_target || src->m_from_target->m_downscaled || tex_size.x >= tex_size_half.x || tex_size.y >= tex_size_half.y;
if (is_downscale && (draw_size.x >= PCRTCDisplays.GetResolution().x || !possible_downscale))
return 0;
@@ -9019,6 +9027,39 @@ int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps)
return is_upscale ? 2 : 1;
}
// Last ditched check if it's doing a lot of small draws exactly the same which could be recursive lighting bloom.
if (m_vt.m_primclass == GS_SPRITE_CLASS && m_index.tail > 2 && !no_gaps_or_single_sprite && m_context->TEX1.MMAG == 1 && !m_context->ALPHA.IsOpaque())
{
GSVertex* v = &m_vertex.buff[0];
float tw = 1 << src->m_TEX0.TW;
float th = 1 << src->m_TEX0.TH;
const int first_u = (PRIM->FST) ? (v[1].U - v[0].U) >> 4 : std::floor(static_cast<int>(tw * v[1].ST.S) - static_cast<int>(tw * v[0].ST.S));
const int first_v = (PRIM->FST) ? (v[1].V - v[0].V) >> 4 : std::floor(static_cast<int>(th * v[1].ST.T) - static_cast<int>(th * v[0].ST.T));
const int first_x = (v[1].XYZ.X - v[0].XYZ.X) >> 4;
const int first_y = (v[1].XYZ.Y - v[0].XYZ.Y) >> 4;
if (first_x > first_u && first_y > first_v && !no_resize && std::abs(draw_size.x - first_x) <= 4 && std::abs(draw_size.y - first_y) <= 4)
{
for (u32 i = 2; i < m_index.tail; i += 2)
{
const int next_u = (PRIM->FST) ? (v[i + 1].U - v[i].U) >> 4 : std::floor(static_cast<int>(tw * v[i + 1].ST.S) - static_cast<int>(tw * v[i].ST.S));
const int next_v = (PRIM->FST) ? (v[i + 1].V - v[i].V) >> 4 : std::floor(static_cast<int>(th * v[i + 1].ST.T) - static_cast<int>(th * v[i].ST.T));
const int next_x = (v[i + 1].XYZ.X - v[i].XYZ.X) >> 4;
const int next_y = (v[i + 1].XYZ.Y - v[i].XYZ.Y) >> 4;
if (std::abs(draw_size.x - next_x) > 4 || std::abs(draw_size.y - next_y) > 4)
break;
if (next_u != first_u || next_v != first_v || next_x != first_x || next_y != first_y)
break;
if (i + 2 >= m_index.tail)
return 2;
}
}
}
return 0;
}

View File

@@ -424,7 +424,7 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw
}
}
}
else // Widths match
else if (!block_offset) // Widths match
{
const int horizontal_dst_page_offset = page_offset % clamped_tbw;
const int vertical_dst_page_offset = page_offset / clamped_tbw;
@@ -1626,14 +1626,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
{
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU,
// it will be slow but can work even with upscaling, also fine tune it so it's not enabled when not needed.
if (psm == PSMT4 || (GSConfig.UserHacks_CPUFBConversion && psm == PSMT8 && (!possible_shuffle || GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp != 32)) ||
(psm == PSMT8H && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16))
if (psm == PSMT4 || (psm == PSMT8H && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16))
{
// Forces 4-bit and 8-bit frame buffer conversion to be done on the CPU instead of the GPU, but performance will be slower.
// There is no dedicated shader to handle 4-bit conversion (Beyond Good and Evil and Stuntman).
// Enable readbacks on PSMT4 as we don't have a dedicated shader (Beyond Good and Evil and Stuntman).
// Enable readbacks on PSMT8H 16bit as we don't have a dedicated shader (History Channel - Battle for the Pacific, Sea World - Shamu's Big Adventure).
// Note: Stuntman no longer hits the PSMT4 code path.
// Direct3D10/11 and OpenGL support 8-bit fb conversion but don't render some corner cases properly (Harry Potter games).
// The hack can fix glitches in some games.
// Note2: Harry Potter is now properly handled with shader conversion and no need to enable frame buffer conversion.
if (!t->m_drawn_since_read.rempty())
{
t->UnscaleRTAlpha();

View File

@@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 66;
static constexpr u32 SHADER_CACHE_VERSION = 67;