From fec210b1e185616b7799e9c1b29328162a33f3db Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 1 May 2024 14:11:20 +1000 Subject: [PATCH] GPU: Ensure coordinates are masked/clamped --- src/core/gpu.cpp | 2 +- src/core/gpu.h | 14 ++++---- src/core/gpu_backend.h | 2 +- src/core/gpu_hw.cpp | 64 ++++++++++++++++++++++++++----------- src/core/gpu_hw.h | 3 ++ src/core/gpu_sw_backend.cpp | 15 +++++---- src/core/gpu_types.h | 17 +++++++++- 7 files changed, 81 insertions(+), 36 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index b4dd0751d..fb3a117b9 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -196,7 +196,7 @@ void GPU::SoftReset() m_GPUSTAT.vertical_interlace = false; m_GPUSTAT.display_disable = true; m_GPUSTAT.dma_direction = DMADirection::Off; - m_drawing_area.Set(0, 0, 0, 0); + m_drawing_area = {}; m_drawing_area_changed = true; m_drawing_offset = {}; std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs)); diff --git a/src/core/gpu.h b/src/core/gpu.h index fd2899678..6dffc0383 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -294,7 +294,10 @@ protected: } /// Returns true if the drawing area is valid (i.e. left <= right, top <= bottom). - ALWAYS_INLINE bool IsDrawingAreaIsValid() const { return m_drawing_area.Valid(); } + ALWAYS_INLINE bool IsDrawingAreaIsValid() const + { + return (m_drawing_area.left <= m_drawing_area.right && m_drawing_area.top <= m_drawing_area.bottom); + } /// Clamps the specified coordinates to the drawing area. ALWAYS_INLINE void ClampCoordinatesToDrawingArea(s32* x, s32* y) @@ -457,13 +460,8 @@ protected: ALWAYS_INLINE void ClearTextureWindowChangedFlag() { texture_window_changed = false; } } m_draw_mode = {}; - Common::Rectangle m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT}; - - struct DrawingOffset - { - s32 x; - s32 y; - } m_drawing_offset = {}; + GPUDrawingArea m_drawing_area = {}; + GPUDrawingOffset m_drawing_offset = {}; bool m_console_is_pal = false; bool m_set_texture_disable_mask = false; diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index 1fc59c5db..c764b4379 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -63,7 +63,7 @@ protected: void HandleCommand(const GPUBackendCommand* cmd); - Common::Rectangle m_drawing_area{}; + GPUDrawingArea m_drawing_area = {}; Threading::KernelSemaphore m_sync_semaphore; std::atomic_bool m_gpu_thread_sleeping{false}; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index c96f662f6..f1eb9ffcd 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -77,7 +77,7 @@ ALWAYS_INLINE static bool IsBlendedTextureFiltering(GPUTextureFilter filter) } /// Computes the area affected by a VRAM transfer, including wrap-around of X. -static Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) +ALWAYS_INLINE_RELEASE static Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) { Common::Rectangle out_rc = Common::Rectangle::FromExtents(x % VRAM_WIDTH, y % VRAM_HEIGHT, width, height); if (out_rc.right > VRAM_WIDTH) @@ -244,6 +244,7 @@ void GPU_HW::Reset(bool clear_vram) m_batch_ubo_data = {}; m_batch_ubo_dirty = true; m_current_depth = 1; + SetClampedDrawingArea(); if (clear_vram) ClearFramebuffer(); @@ -500,6 +501,20 @@ void GPU_HW::CheckSettings() } } +void GPU_HW::SetClampedDrawingArea() +{ + if (!IsDrawingAreaIsValid()) [[unlikely]] + { + m_clamped_drawing_area = {}; + return; + } + + m_clamped_drawing_area.right = std::min(m_drawing_area.right + 1, static_cast(VRAM_WIDTH)); + m_clamped_drawing_area.left = std::min(m_drawing_area.left, std::min(m_clamped_drawing_area.right, VRAM_WIDTH - 1)); + m_clamped_drawing_area.bottom = std::min(m_drawing_area.bottom + 1, static_cast(VRAM_HEIGHT)); + m_clamped_drawing_area.top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); +} + u32 GPU_HW::CalculateResolutionScale() const { const u32 max_resolution_scale = GetMaxResolutionScale(); @@ -592,6 +607,26 @@ void GPU_HW::ClearVRAMDirtyRectangle() m_vram_dirty_write_rect.SetInvalid(); } +void GPU_HW::IncludeDrawnDirtyRectangle(s32 min_x, s32 min_y, s32 max_x, s32 max_y) +{ + const u32 clamped_min_x = std::clamp(min_x, static_cast(m_clamped_drawing_area.left), + static_cast(m_clamped_drawing_area.right - 1)); + const u32 clamped_max_x = + std::clamp(max_x, static_cast(m_clamped_drawing_area.left), static_cast(m_clamped_drawing_area.right)); + m_vram_dirty_draw_rect.left = std::min(m_vram_dirty_draw_rect.left, clamped_min_x); + m_vram_dirty_draw_rect.right = std::max(m_vram_dirty_draw_rect.right, clamped_max_x); + + const u32 clamped_min_y = std::clamp(min_y, static_cast(m_clamped_drawing_area.top), + static_cast(m_clamped_drawing_area.bottom - 1)); + const u32 clamped_max_y = + std::clamp(max_y, static_cast(m_clamped_drawing_area.top), static_cast(m_clamped_drawing_area.bottom)); + m_vram_dirty_draw_rect.top = std::min(m_vram_dirty_draw_rect.top, clamped_min_y); + m_vram_dirty_draw_rect.bottom = std::max(m_vram_dirty_draw_rect.bottom, clamped_max_y); + + DebugAssert(m_vram_dirty_draw_rect.left < VRAM_WIDTH && m_vram_dirty_draw_rect.right <= VRAM_WIDTH); + DebugAssert(m_vram_dirty_draw_rect.top < VRAM_HEIGHT && m_vram_dirty_draw_rect.bottom <= VRAM_HEIGHT); +} + std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) { const u32 scale = scaled ? m_resolution_scale : 1u; @@ -2004,13 +2039,9 @@ void GPU_HW::LoadVertices() } else { - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + // TODO: Cull triangles that fall entirely off-screen. + IncludeDrawnDirtyRectangle(min_x, min_y, max_x, max_y); - m_vram_dirty_draw_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawTriangleTicks(native_vertex_positions[0][0], native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1], native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable, @@ -2041,14 +2072,8 @@ void GPU_HW::LoadVertices() } else { - const u32 clip_left = static_cast(std::clamp(min_x_123, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast(std::clamp(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y_123, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + IncludeDrawnDirtyRectangle(min_x_123, min_y_123, max_x_123, max_y_123); - m_vram_dirty_draw_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawTriangleTicks(native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0], native_vertex_positions[1][1], native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable, @@ -2187,14 +2212,14 @@ void GPU_HW::LoadVertices() tex_top = 0; } + IncludeDrawnDirtyRectangle(pos_x, pos_y, pos_x + rectangle_width, pos_y + rectangle_height); + const u32 clip_left = static_cast(std::clamp(pos_x, m_drawing_area.left, m_drawing_area.right)); const u32 clip_right = static_cast(std::clamp(pos_x + rectangle_width, m_drawing_area.left, m_drawing_area.right)) + 1u; const u32 clip_top = static_cast(std::clamp(pos_y, m_drawing_area.top, m_drawing_area.bottom)); const u32 clip_bottom = static_cast(std::clamp(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_draw_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable); if (m_sw_renderer) @@ -2251,13 +2276,14 @@ void GPU_HW::LoadVertices() return; } + IncludeDrawnDirtyRectangle(min_x, min_y, max_x, max_y); + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.right)); const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - m_vram_dirty_draw_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -2317,6 +2343,8 @@ void GPU_HW::LoadVertices() } else { + IncludeDrawnDirtyRectangle(min_x, min_y, max_x, max_y); + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.right)); const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; @@ -2324,7 +2352,6 @@ void GPU_HW::LoadVertices() const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - m_vram_dirty_draw_rect.Include(clip_left, clip_right, clip_top, clip_bottom); AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); // TODO: Should we do a PGXP lookup here? Most lines are 2D. @@ -3087,6 +3114,7 @@ void GPU_HW::DispatchRenderCommand() if (m_drawing_area_changed) { m_drawing_area_changed = false; + SetClampedDrawingArea(); SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 7b71aa25a..e2d899fcf 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -131,6 +131,7 @@ private: void PrintSettingsToLog(); void CheckSettings(); + void SetClampedDrawingArea(); void UpdateVRAMReadTexture(bool drawn, bool written); void UpdateDepthBufferFromMaskBit(); void ClearDepthBuffer(); @@ -149,6 +150,7 @@ private: void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); void IncludeVRAMDirtyRectangle(Common::Rectangle& rect, const Common::Rectangle& new_rect); + void IncludeDrawnDirtyRectangle(s32 min_x, s32 min_y, s32 max_x, s32 max_y); void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v); bool IsFlushed() const; @@ -252,6 +254,7 @@ private: BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. + GPUDrawingArea m_clamped_drawing_area = {}; Common::Rectangle m_vram_dirty_draw_rect; Common::Rectangle m_vram_dirty_write_rect; Common::Rectangle m_current_uv_range; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index 6eb760bca..648291752 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -214,6 +214,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman if ((bg_color.bits & mask_and) != 0) return; + DebugAssert(static_cast(x) < VRAM_WIDTH && static_cast(y) < VRAM_HEIGHT); SetPixel(static_cast(x), static_cast(y), color.bits | cmd->params.GetMaskOR()); } @@ -234,6 +235,7 @@ void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) continue; } + const u32 draw_y = static_cast(y) & VRAM_HEIGHT_MASK; const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); for (u32 offset_x = 0; offset_x < cmd->width; offset_x++) @@ -244,8 +246,8 @@ void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); - ShadePixel( - cmd, static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); + ShadePixel(cmd, static_cast(x), draw_y, r, g, + b, texcoord_x, texcoord_y); } } } @@ -569,7 +571,7 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, continue; DrawSpan( - cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); } } else @@ -583,9 +585,8 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, if (y >= static_cast(m_drawing_area.top)) { - DrawSpan( - cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); } yi++; @@ -698,8 +699,8 @@ void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBac const u8 g = shading_enable ? static_cast(cur_point.g >> Line_RGB_FractBits) : p0->g; const u8 b = shading_enable ? static_cast(cur_point.b >> Line_RGB_FractBits) : p0->b; - ShadePixel(cmd, static_cast(x), static_cast(y), r, - g, b, 0, 0); + ShadePixel( + cmd, static_cast(x), static_cast(y) & VRAM_HEIGHT_MASK, r, g, b, 0, 0); } cur_point.x += step.dx_dk; diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 44eeb266d..93951c6df 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -83,6 +83,21 @@ enum class GPUInterlacedDisplayMode : u8 SeparateFields }; +// NOTE: Inclusive, not exclusive on the upper bounds. +struct GPUDrawingArea +{ + u32 left; + u32 top; + u32 right; + u32 bottom; +}; + +struct GPUDrawingOffset +{ + s32 x; + s32 y; +}; + union GPURenderCommand { u32 bits; @@ -318,7 +333,7 @@ struct GPUBackendCopyVRAMCommand : public GPUBackendCommand struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand { - Common::Rectangle new_area; + GPUDrawingArea new_area; }; struct GPUBackendDrawCommand : public GPUBackendCommand