GPU/HW: Handle redundant texture window updates

Significantly reduces draw count in THPS2.

~150 draw calls per frame to ~70.
This commit is contained in:
Stenzek 2024-10-05 13:06:45 +10:00
parent 52389f48a6
commit 71bb953253
No known key found for this signature in database
5 changed files with 21 additions and 24 deletions

View File

@ -356,7 +356,6 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
if (sw.IsReading())
{
m_draw_mode.texture_page_changed = true;
m_draw_mode.texture_window_changed = true;
m_drawing_area_changed = true;
SetClampedDrawingArea();
UpdateDMARequest();
@ -1584,8 +1583,6 @@ void GPU::SetTextureWindow(u32 value)
if (m_draw_mode.texture_window_value == value)
return;
FlushRender();
const u8 mask_x = Truncate8(value & UINT32_C(0x1F));
const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F));
const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F));
@ -1597,7 +1594,6 @@ void GPU::SetTextureWindow(u32 value)
m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
m_draw_mode.texture_window_value = value;
m_draw_mode.texture_window_changed = true;
}
void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit)

View File

@ -478,15 +478,10 @@ protected:
bool texture_x_flip;
bool texture_y_flip;
bool texture_page_changed;
bool texture_window_changed;
ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; }
ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; }
ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; }
ALWAYS_INLINE bool IsTextureWindowChanged() const { return texture_window_changed; }
ALWAYS_INLINE void SetTextureWindowChanged() { texture_window_changed = true; }
ALWAYS_INLINE void ClearTextureWindowChangedFlag() { texture_window_changed = false; }
} m_draw_mode = {};
GPUDrawingArea m_drawing_area = {};

View File

@ -3634,7 +3634,7 @@ void GPU_HW::DispatchRenderCommand()
{
if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode ||
(transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) ||
dithering_enable != m_batch.dithering ||
dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window ||
(texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key))
{
FlushRender();
@ -3684,17 +3684,12 @@ void GPU_HW::DispatchRenderCommand()
m_batch.dithering = dithering_enable;
m_batch.texture_cache_key = texture_cache_key;
if (m_draw_mode.IsTextureWindowChanged())
if (m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window)
{
m_draw_mode.ClearTextureWindowChangedFlag();
m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(m_draw_mode.texture_window.and_x);
m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(m_draw_mode.texture_window.and_y);
m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(m_draw_mode.texture_window.or_x);
m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(m_draw_mode.texture_window.or_y);
m_texture_window_active = ((m_draw_mode.texture_window.and_x & m_draw_mode.texture_window.and_y) != 0xFF ||
((m_draw_mode.texture_window.or_x | m_draw_mode.texture_window.or_y) != 0));
m_batch_ubo_data.u_texture_window_bits = m_draw_mode.texture_window;
m_texture_window_active = (m_draw_mode.texture_window != GPUTextureWindow{0xFF, 0xFF, 0x00, 0x00});
GSVector4i::store<true>(&m_batch_ubo_data.u_texture_window[0],
GSVector4i::load32(&m_draw_mode.texture_window).u8to32());
m_batch_ubo_dirty = true;
}

View File

@ -113,7 +113,7 @@ private:
void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
};
struct BatchConfig
struct alignas(4) BatchConfig
{
BatchTextureMode texture_mode = BatchTextureMode::Disabled;
GPUTransparencyMode transparency_mode = GPUTransparencyMode::Disabled;
@ -130,7 +130,7 @@ private:
BatchRenderMode GetRenderMode() const;
};
struct BatchUBOData
struct alignas(VECTOR_ALIGNMENT) BatchUBOData
{
u32 u_texture_window[4]; // and_x, and_y, or_x, or_y
float u_src_alpha_factor;
@ -140,7 +140,7 @@ private:
float u_resolution_scale;
float u_rcp_resolution_scale;
float u_resolution_scale_minus_one;
u32 pad;
GPUTextureWindow u_texture_window_bits; // not actually used on GPU
};
struct RendererStats
@ -302,10 +302,10 @@ private:
u8 m_texpage_dirty = 0;
bool m_batch_ubo_dirty = true;
BatchConfig m_batch;
// Changed state
bool m_batch_ubo_dirty = true;
BatchUBOData m_batch_ubo_data = {};
// Bounding box of VRAM area that the GPU has drawn into.

View File

@ -10,6 +10,7 @@
#include "common/gsvector.h"
#include <array>
#include <string>
enum : u32
{
@ -229,6 +230,16 @@ struct GPUTextureWindow
u8 and_y;
u8 or_x;
u8 or_y;
ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
}
ALWAYS_INLINE bool operator!=(const GPUTextureWindow& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
}
};
ALWAYS_INLINE static constexpr u32 VRAMPageIndex(u32 px, u32 py)