Compare commits

...

5 Commits

Author SHA1 Message Date
refractionpcsx2
4d37e35675 GS/TC: Delete dirty rt's in src lookup + usert in rt on 3 draw old rt's 2025-05-18 11:13:35 +02:00
refractionpcsx2
df3868a280 GS/HW: Avoid target height mistakes on shuffles + Update new src == rt 2025-05-18 11:13:35 +02:00
lightningterror
0799bb8cf1 GS/DX: DX requires a copy to sample the depth buffer. 2025-05-17 22:54:04 +02:00
lightningterror
69048dede4 GS/DX11: Merge CloneTexture with CopyRect.
Unified between renderers, easier to make shared changes.
2025-05-17 22:54:04 +02:00
lightningterror
76df6d1f43 GS/GL: Check for texture creation hazard for fb copy. 2025-05-17 22:54:04 +02:00
7 changed files with 152 additions and 66 deletions

View File

@@ -1236,36 +1236,18 @@ void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
D3D11_BOX box = {(UINT)r.left, (UINT)r.top, 0U, (UINT)r.right, (UINT)r.bottom, 1U};
D3D11_BOX box = {static_cast<UINT>(r.left), static_cast<UINT>(r.top), 0U, static_cast<UINT>(r.right), static_cast<UINT>(r.bottom), 1U};
// DX api isn't happy if we pass a box for depth copy
// It complains that depth/multisample must be a full copy
// and asks us to use a NULL for the box
// DX11 doesn't support partial depth copy so we need to
// either pass a nullptr D3D11_BOX for a full depth copy or use CopyResource instead.
// Alternatively use shader copy StretchRect, or full depth copy with
// adjusting the scissor and UVs in the shader.
const bool depth = (sTex->GetType() == GSTexture::Type::DepthStencil);
auto pBox = depth ? nullptr : &box;
const u32 x = depth ? 0 : destX;
const u32 y = depth ? 0 : destY;
m_ctx->CopySubresourceRegion(*(GSTexture11*)dTex, 0, destX, destY, 0, *(GSTexture11*)sTex, 0, pBox);
}
void GSDevice11::CloneTexture(GSTexture* src, GSTexture** dest, const GSVector4i& rect)
{
pxAssertMsg(src->GetType() == GSTexture::Type::DepthStencil || src->GetType() == GSTexture::Type::RenderTarget, "Source is RT or DS.");
CommitClear(src);
const int w = src->GetWidth();
const int h = src->GetHeight();
if (src->GetType() == GSTexture::Type::DepthStencil)
{
// DX11 requires that you copy the entire depth buffer.
*dest = CreateDepthStencil(w, h, src->GetFormat(), false);
CopyRect(src, *dest, GSVector4i(0, 0, w, h), 0, 0);
}
else
{
*dest = CreateRenderTarget(w, h, src->GetFormat(), false);
CopyRect(src, *dest, rect, rect.left, rect.top);
}
m_ctx->CopySubresourceRegion(*(GSTexture11*)dTex, 0, x, y, 0, *(GSTexture11*)sTex, 0, pBox);
}
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
@@ -2630,20 +2612,33 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
PSSetShaderResource(1, config.pal);
}
GSTexture* rt_copy = nullptr;
if (config.require_one_barrier || (config.tex && config.tex == config.rt)) // Used as "bind rt" flag when texture barrier is unsupported.
GSTexture* draw_rt_clone = nullptr;
if (config.require_one_barrier || (config.tex && config.tex == config.rt))
{
// Bind the RT.This way special effect can use it.
// Do not always bind the rt when it's not needed,
// only bind it when effects use it such as fbmask emulation currently
// because we copy the frame buffer and it is quite slow.
CloneTexture(colclip_rt ? colclip_rt : config.rt, &rt_copy, config.drawarea);
if (rt_copy)
// Requires a copy of the RT.
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true);
if (draw_rt_clone)
{
CopyRect(colclip_rt ? colclip_rt : config.rt, draw_rt_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
if (config.require_one_barrier)
PSSetShaderResource(2, rt_copy);
PSSetShaderResource(2, draw_rt_clone);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, rt_copy);
PSSetShaderResource(0, draw_rt_clone);
}
}
GSTexture* draw_ds_clone = nullptr;
if (config.tex && config.tex == config.ds)
{
// DX requires a copy when sampling the depth buffer.
draw_ds_clone = CreateDepthStencil(rtsize.x, rtsize.y, config.ds->GetFormat(), false);
if (draw_ds_clone)
{
CopyRect(config.ds, draw_ds_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
PSSetShaderResource(0, draw_ds_clone);
}
}
@@ -2697,8 +2692,12 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
DrawIndexedPrimitive();
}
if (rt_copy)
Recycle(rt_copy);
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (draw_ds_clone)
Recycle(draw_ds_clone);
if (primid_tex)
Recycle(primid_tex);

View File

@@ -291,7 +291,6 @@ public:
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) override;
void CommitClear(GSTexture* t);
void CloneTexture(GSTexture* src, GSTexture** dest, const GSVector4i& rect);
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) override;

View File

@@ -3828,6 +3828,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
GSTexture12* draw_rt_clone = nullptr;
GSTexture12* draw_ds_clone = nullptr;
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
@@ -3883,7 +3884,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
// bind textures before checking the render pass, in case we need to transition them
if (config.tex)
{
PSSetShaderResource(0, config.tex, config.tex != config.rt);
PSSetShaderResource(0, config.tex, config.tex != config.rt && config.tex != config.ds);
PSSetSampler(config.sampler);
}
if (config.pal)
@@ -3907,15 +3908,16 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
}
if (config.require_one_barrier || (config.tex && config.tex == config.rt)) // Used as "bind rt" flag when texture barrier is unsupported.
if (config.require_one_barrier || (config.tex && config.tex == config.rt))
{
// requires a copy of the RT
// Requires a copy of the RT.
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
draw_rt_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true));
if (draw_rt_clone)
{
EndRenderPass();
GL_PUSH("Copy RT to temp texture for fbmask {%d,%d %dx%d}", config.drawarea.left, config.drawarea.top,
GL_PUSH("Copy RT to temp texture {%d,%d %dx%d}", config.drawarea.left, config.drawarea.top,
config.drawarea.width(), config.drawarea.height());
draw_rt_clone->SetState(GSTexture::State::Invalidated);
@@ -3927,6 +3929,23 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
}
}
if (config.tex && config.tex == config.ds)
{
// DX requires a copy when sampling the depth buffer.
draw_ds_clone = static_cast<GSTexture12*>(CreateDepthStencil(rtsize.x, rtsize.y, config.ds->GetFormat(), false));
if (draw_ds_clone)
{
EndRenderPass();
GL_PUSH("Copy RT to temp texture {%d,%d %dx%d}", config.drawarea.left, config.drawarea.top,
config.drawarea.width(), config.drawarea.height());
draw_ds_clone->SetState(GSTexture::State::Invalidated);
CopyRect(config.ds, draw_ds_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
PSSetShaderResource(0, draw_ds_clone, true);
}
}
// Switch to colclip target for colclip hw rendering
if (pipe.ps.colclip_hw)
{
@@ -4073,6 +4092,9 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (draw_ds_clone)
Recycle(draw_ds_clone);
if (date_image)
Recycle(date_image);

View File

@@ -1037,7 +1037,7 @@ float GSRendererHW::GetTextureScaleFactor()
return GetUpscaleMultiplier();
}
GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex)
GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex, const bool is_shuffle)
{
// Don't blindly expand out to the scissor size if we're not drawing to it.
// e.g. Burnout 3, God of War II, etc.
@@ -1088,10 +1088,9 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex)
// Early detection of texture shuffles. These double the input height because they're interpreting 64x32 C32 pages as 64x64 C16.
// Why? Well, we don't want to be doubling the heights of targets, but also we don't want to align C32 targets to 64 instead of 32.
// Yumeria's text breaks, and GOW goes to 512x448 instead of 512x416 if we don't.
const bool possible_texture_shuffle =
(tex && m_vt.m_primclass == GS_SPRITE_CLASS && frame_psm.bpp == 16 &&
const bool possible_texture_shuffle = tex && m_vt.m_primclass == GS_SPRITE_CLASS && frame_psm.bpp == 16 &&
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 &&
(tex->m_32_bits_fmt ||
(is_shuffle || (tex->m_32_bits_fmt ||
(m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block() && IsOpaque() && !(m_context->TEX1.MMIN & 1) &&
m_cached_ctx.FRAME.FBMSK && g_texture_cache->Has32BitTarget(m_cached_ctx.FRAME.Block()))));
if (possible_texture_shuffle)
@@ -1128,9 +1127,9 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex)
return GSVector2i(width, height);
}
GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex, const bool can_expand)
GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex, const bool can_expand, const bool is_shuffle)
{
const GSVector2i valid_size = GetValidSize(tex);
const GSVector2i valid_size = GetValidSize(tex, is_shuffle);
return g_texture_cache->GetTargetSize(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, valid_size.x, valid_size.y, can_expand);
}
@@ -2942,7 +2941,13 @@ void GSRendererHW::Draw()
if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp != 16)
possible_shuffle &= draw_uses_target;
possible_shuffle &= src && (src->m_from_target != nullptr || (m_skip && possible_shuffle));
const bool shuffle_source = src && (src->m_from_target != nullptr || (m_skip && possible_shuffle));
if (!shuffle_source)
{
if(draw_start > src->m_TEX0.TBP0 || draw_end < src->m_TEX0.TBP0)
possible_shuffle &= src && (src->m_from_target != nullptr || (m_skip && possible_shuffle));
}
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
// recompute it, and everything derived from it again if it changes.
@@ -2988,7 +2993,7 @@ void GSRendererHW::Draw()
const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && output_black);
// Estimate size based on the scissor rectangle and height cache.
GSVector2i t_size = GetTargetSize(src, can_expand);
GSVector2i t_size = GetTargetSize(src, can_expand, possible_shuffle);
const GSVector4i t_size_rect = GSVector4i::loadh(t_size);
// Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area.
@@ -3077,7 +3082,7 @@ void GSRendererHW::Draw()
if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP)
{
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src, possible_shuffle), target_scale, GSTextureCache::DepthStencil,
true, 0, false, force_preload, preserve_depth, m_r, src);
if (!ds) [[unlikely]]
{
@@ -3228,7 +3233,7 @@ void GSRendererHW::Draw()
return;
}
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale,
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src, possible_shuffle), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale,
GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src);
if (!rt) [[unlikely]]
@@ -3242,6 +3247,49 @@ void GSRendererHW::Draw()
{
rt->UpdateValidity(GSVector4i::loadh(GSVector2i(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x, GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.y)), true);
}
if (src && !src->m_from_target && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp &&
(GSUtil::GetChannelMask(src->m_TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM)) != 0)
{
const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1;
const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
if (draw_start <= src->m_TEX0.TBP0 && draw_end > src->m_TEX0.TBP0)
{
g_texture_cache->ReplaceSourceTexture(src, rt->GetTexture(), rt->GetScale(), rt->GetUnscaledSize(), nullptr, true);
src->m_from_target = rt;
src->m_from_target_TEX0 = rt->m_TEX0;
src->m_target_direct = true;
src->m_shared_texture = true;
src->m_target = true;
src->m_texture = rt->m_texture;
src->m_32_bits_fmt = rt->m_32_bits_fmt;
src->m_valid_rect = rt->m_valid;
src->m_alpha_minmax.first = rt->m_alpha_min;
src->m_alpha_minmax.second = rt->m_alpha_max;
const int target_width = std::max(FRAME_TEX0.TBW, 1U);
const int page_offset = (src->m_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5;
const int vertical_page_offset = page_offset / target_width;
const int horizontal_page_offset = page_offset - (vertical_page_offset * target_width);
if (vertical_page_offset)
{
const int height = std::max(rt->m_valid.w, possible_shuffle ? (m_r.w / 2) : m_r.w);
src->m_region.SetY(vertical_page_offset * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y, height);
}
if (horizontal_page_offset)
src->m_region.SetX(horizontal_page_offset * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.x, target_width * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.x);
if (rt->m_dirty.empty())
{
RGBAMask rgba_mask;
rgba_mask._u32 = GSUtil::GetChannelMask(rt->m_TEX0.PSM);
g_texture_cache->AddDirtyRectTarget(rt, m_r, FRAME_TEX0.PSM, FRAME_TEX0.TBW, rgba_mask, GSLocalMemory::m_psm[FRAME_TEX0.PSM].trbpp >= 16);
}
}
}
}
else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block())
{
@@ -3539,7 +3587,7 @@ void GSRendererHW::Draw()
// This should never happen, but just to be safe..
if (!ds)
{
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src, possible_shuffle), target_scale, GSTextureCache::DepthStencil,
true, 0, false, force_preload, preserve_depth, m_r, src);
if (!ds) [[unlikely]]
{

View File

@@ -222,8 +222,8 @@ public:
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
void MergeSprite(GSTextureCache::Source* tex);
float GetTextureScaleFactor() override;
GSVector2i GetValidSize(const GSTextureCache::Source* tex = nullptr);
GSVector2i GetTargetSize(const GSTextureCache::Source* tex = nullptr, const bool can_expand = true);
GSVector2i GetValidSize(const GSTextureCache::Source* tex = nullptr, const bool is_shuffle = false);
GSVector2i GetTargetSize(const GSTextureCache::Source* tex = nullptr, const bool can_expand = true, const bool is_shuffle = false);
void Reset(bool hardware_reset) override;
void UpdateSettings(const Pcsx2Config::GSOptions& old_config) override;

View File

@@ -1408,16 +1408,31 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (rect_clean)
{
bool can_use = true;
for (auto& dirty : t->m_dirty)
{
const GSVector4i dirty_rect = dirty.GetDirtyRect(t->m_TEX0, t->m_TEX0.PSM != dirty.psm);
if (!dirty_rect.rintersect(new_rect).rempty())
{
rect_clean = false;
partial |= !new_rect.rintersect(dirty_rect).eq(new_rect) || dirty_rect.eq(new_rect);
if(!dirty_rect.rintersect(t->m_valid).eq(t->m_valid) || GSUtil::GetChannelMask(t->m_TEX0.PSM) != t->m_dirty.GetDirtyChannels())
partial |= !new_rect.rintersect(dirty_rect).eq(new_rect) || dirty_rect.eq(new_rect);
else // Nothing is valid anymore, kill it.
{
can_use = false;
}
break;
}
}
if (!can_use)
{
InvalidateSourcesFromTarget(t);
i = list.erase(i);
delete t;
continue;
}
}
const u32 channel_mask = GSUtil::GetChannelMask(psm);
@@ -2340,11 +2355,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
lookup_rect = lookup_rect & GSVector4i(~8);
const GSVector4i translated_rect = GSVector4i(0, 0, 0, 0).max_i32(TranslateAlignedRectByPage(t, TEX0.TBP0, TEX0.PSM, TEX0.TBW, lookup_rect));
const GSVector4i dirty_rect = t->m_dirty.empty() ? GSVector4i::zero() : t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(t->m_valid);
const GSVector4i dirty_rect = t->m_dirty.empty() ? GSVector4i::zero() : t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size);
const bool all_dirty = dirty_rect.eq(t->m_valid);
if (!is_shuffle && !t->m_dirty.empty() && (!preserve_alpha && !preserve_rgb) && (GSState::s_n - 1) != t->m_last_draw)
if (!is_shuffle && !dirty_rect.rempty() && (!preserve_alpha && !preserve_rgb) && (GSState::s_n - 3) > t->m_last_draw)
{
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to dirty areas not preserved (Likely change in target)", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t);
@@ -2354,7 +2369,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
continue;
}
if (!all_dirty && ((translated_rect.w <= t->m_valid.w) || widthpage_offset == 0 || (GSState::s_n - 1) == t->m_last_draw))
if (!all_dirty && ((translated_rect.w <= t->m_valid.w) || widthpage_offset == 0 || (GSState::s_n - 3) <= t->m_last_draw))
{
if (TEX0.TBW == t->m_TEX0.TBW && !is_shuffle && widthpage_offset == 0 && ((min_rect.w + 63) / 64) > 1)
{

View File

@@ -2489,12 +2489,15 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (config.require_one_barrier && !m_features.texture_barrier)
{
// Requires a copy of the RT
// Requires a copy of the RT.
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true);
GL_PUSH("Copy RT to temp texture for fbmask {%d,%d %dx%d}",
config.drawarea.left, config.drawarea.top,
config.drawarea.width(), config.drawarea.height());
CopyRect(colclip_rt ? colclip_rt : config.rt, draw_rt_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
if (draw_rt_clone)
{
GL_PUSH("GL: Copy RT to temp texture {%d,%d %dx%d}",
config.drawarea.left, config.drawarea.top,
config.drawarea.width(), config.drawarea.height());
CopyRect(colclip_rt ? colclip_rt : config.rt, draw_rt_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
}
}
IASetVertexBuffer(config.verts, config.nverts);
@@ -2563,7 +2566,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (check_barrier && ((config.tex && (config.tex == config.ds || config.tex == config.rt)) || ((psel.ps.IsFeedbackLoop() || psel.ps.blend_c == 1) && GLState::rt == config.rt)))
{
// Ensure all depth writes are finished before sampling
GL_INS("Texture barrier to flush depth or rt before reading");
GL_INS("GL: Texture barrier to flush depth or rt before reading");
glTextureBarrier();
}
// additional non-pipeline config stuff