mirror of
https://github.com/stenzek/duckstation.git
synced 2024-11-23 13:59:49 +00:00
GPU/HW: Handle redundant texture window updates
Significantly reduces draw count in THPS2. ~150 draw calls per frame to ~70.
This commit is contained in:
parent
52389f48a6
commit
71bb953253
@ -356,7 +356,6 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
|
||||
if (sw.IsReading())
|
||||
{
|
||||
m_draw_mode.texture_page_changed = true;
|
||||
m_draw_mode.texture_window_changed = true;
|
||||
m_drawing_area_changed = true;
|
||||
SetClampedDrawingArea();
|
||||
UpdateDMARequest();
|
||||
@ -1584,8 +1583,6 @@ void GPU::SetTextureWindow(u32 value)
|
||||
if (m_draw_mode.texture_window_value == value)
|
||||
return;
|
||||
|
||||
FlushRender();
|
||||
|
||||
const u8 mask_x = Truncate8(value & UINT32_C(0x1F));
|
||||
const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F));
|
||||
const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F));
|
||||
@ -1597,7 +1594,6 @@ void GPU::SetTextureWindow(u32 value)
|
||||
m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
|
||||
m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
|
||||
m_draw_mode.texture_window_value = value;
|
||||
m_draw_mode.texture_window_changed = true;
|
||||
}
|
||||
|
||||
void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit)
|
||||
|
@ -478,15 +478,10 @@ protected:
|
||||
bool texture_x_flip;
|
||||
bool texture_y_flip;
|
||||
bool texture_page_changed;
|
||||
bool texture_window_changed;
|
||||
|
||||
ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; }
|
||||
ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; }
|
||||
ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; }
|
||||
|
||||
ALWAYS_INLINE bool IsTextureWindowChanged() const { return texture_window_changed; }
|
||||
ALWAYS_INLINE void SetTextureWindowChanged() { texture_window_changed = true; }
|
||||
ALWAYS_INLINE void ClearTextureWindowChangedFlag() { texture_window_changed = false; }
|
||||
} m_draw_mode = {};
|
||||
|
||||
GPUDrawingArea m_drawing_area = {};
|
||||
|
@ -3634,7 +3634,7 @@ void GPU_HW::DispatchRenderCommand()
|
||||
{
|
||||
if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode ||
|
||||
(transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) ||
|
||||
dithering_enable != m_batch.dithering ||
|
||||
dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window ||
|
||||
(texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key))
|
||||
{
|
||||
FlushRender();
|
||||
@ -3684,17 +3684,12 @@ void GPU_HW::DispatchRenderCommand()
|
||||
m_batch.dithering = dithering_enable;
|
||||
m_batch.texture_cache_key = texture_cache_key;
|
||||
|
||||
if (m_draw_mode.IsTextureWindowChanged())
|
||||
if (m_batch_ubo_data.u_texture_window_bits != m_draw_mode.texture_window)
|
||||
{
|
||||
m_draw_mode.ClearTextureWindowChangedFlag();
|
||||
|
||||
m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(m_draw_mode.texture_window.and_x);
|
||||
m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(m_draw_mode.texture_window.and_y);
|
||||
m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(m_draw_mode.texture_window.or_x);
|
||||
m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(m_draw_mode.texture_window.or_y);
|
||||
|
||||
m_texture_window_active = ((m_draw_mode.texture_window.and_x & m_draw_mode.texture_window.and_y) != 0xFF ||
|
||||
((m_draw_mode.texture_window.or_x | m_draw_mode.texture_window.or_y) != 0));
|
||||
m_batch_ubo_data.u_texture_window_bits = m_draw_mode.texture_window;
|
||||
m_texture_window_active = (m_draw_mode.texture_window != GPUTextureWindow{0xFF, 0xFF, 0x00, 0x00});
|
||||
GSVector4i::store<true>(&m_batch_ubo_data.u_texture_window[0],
|
||||
GSVector4i::load32(&m_draw_mode.texture_window).u8to32());
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ private:
|
||||
void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
|
||||
};
|
||||
|
||||
struct BatchConfig
|
||||
struct alignas(4) BatchConfig
|
||||
{
|
||||
BatchTextureMode texture_mode = BatchTextureMode::Disabled;
|
||||
GPUTransparencyMode transparency_mode = GPUTransparencyMode::Disabled;
|
||||
@ -130,7 +130,7 @@ private:
|
||||
BatchRenderMode GetRenderMode() const;
|
||||
};
|
||||
|
||||
struct BatchUBOData
|
||||
struct alignas(VECTOR_ALIGNMENT) BatchUBOData
|
||||
{
|
||||
u32 u_texture_window[4]; // and_x, and_y, or_x, or_y
|
||||
float u_src_alpha_factor;
|
||||
@ -140,7 +140,7 @@ private:
|
||||
float u_resolution_scale;
|
||||
float u_rcp_resolution_scale;
|
||||
float u_resolution_scale_minus_one;
|
||||
u32 pad;
|
||||
GPUTextureWindow u_texture_window_bits; // not actually used on GPU
|
||||
};
|
||||
|
||||
struct RendererStats
|
||||
@ -302,10 +302,10 @@ private:
|
||||
|
||||
u8 m_texpage_dirty = 0;
|
||||
|
||||
bool m_batch_ubo_dirty = true;
|
||||
BatchConfig m_batch;
|
||||
|
||||
// Changed state
|
||||
bool m_batch_ubo_dirty = true;
|
||||
BatchUBOData m_batch_ubo_data = {};
|
||||
|
||||
// Bounding box of VRAM area that the GPU has drawn into.
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "common/gsvector.h"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
enum : u32
|
||||
{
|
||||
@ -229,6 +230,16 @@ struct GPUTextureWindow
|
||||
u8 and_y;
|
||||
u8 or_x;
|
||||
u8 or_y;
|
||||
|
||||
ALWAYS_INLINE bool operator==(const GPUTextureWindow& rhs) const
|
||||
{
|
||||
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool operator!=(const GPUTextureWindow& rhs) const
|
||||
{
|
||||
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
|
||||
}
|
||||
};
|
||||
|
||||
ALWAYS_INLINE static constexpr u32 VRAMPageIndex(u32 px, u32 py)
|
||||
|
Loading…
Reference in New Issue
Block a user