From bb3d27f153e3bd2000d76f980c09d56f9f62fdac Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 23 Sep 2024 00:57:31 +1000 Subject: [PATCH] GPU/SW: Drop dithering function specialization Reduces the code size by roughly half, which may be beneficial on icache-starved processors. Also fixes too-bright values being fed into the blending equation. --- src/core/gpu_sw.cpp | 1 + src/core/gpu_sw_backend.cpp | 7 +- src/core/gpu_sw_rasterizer.h | 14 ++-- src/core/gpu_sw_rasterizer.inl | 142 +++++++++++++-------------------- src/core/gpu_types.h | 2 - 5 files changed, 67 insertions(+), 99 deletions(-) diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 6dd0493c4..c8119c3a2 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -491,6 +491,7 @@ void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) co FillBackendCommandParameters(cmd); cmd->rc.bits = rc.bits; cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->draw_mode.dither_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; cmd->palette.bits = m_draw_mode.palette_reg.bits; cmd->window = m_draw_mode.texture_window; } diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index 4a5cd5297..6168f0d39 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -29,10 +29,9 @@ void GPU_SW_Backend::Reset() void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) { const GPURenderCommand rc{cmd->rc.bits}; - const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; const GPU_SW_Rasterizer::DrawTriangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); DrawFunction(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); if (rc.quad_polygon) @@ -51,8 +50,8 @@ void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) { - const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = GPU_SW_Rasterizer::GetDrawLineFunction( - cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); + const GPU_SW_Rasterizer::DrawLineFunction DrawFunction = + GPU_SW_Rasterizer::GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable); for (u16 i = 1; i < cmd->num_vertices; i++) DrawFunction(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); diff --git a/src/core/gpu_sw_rasterizer.h b/src/core/gpu_sw_rasterizer.h index d6f3adace..a3eebe062 100644 --- a/src/core/gpu_sw_rasterizer.h +++ b/src/core/gpu_sw_rasterizer.h @@ -28,11 +28,11 @@ using DrawTriangleFunction = void (*)(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); -typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2][2]; +typedef const DrawTriangleFunction DrawTriangleFunctionTable[2][2][2][2]; using DrawLineFunction = void (*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1); -typedef const DrawLineFunction DrawLineFunctionTable[2][2][2]; +typedef const DrawLineFunction DrawLineFunctionTable[2][2]; // Default implementation, compatible with all ISAs. extern const DrawRectangleFunctionTable DrawRectangleFunctions; @@ -46,10 +46,9 @@ extern const DrawLineFunctionTable* SelectedDrawLineFunctions; extern void SelectImplementation(); -ALWAYS_INLINE static DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, - bool dithering_enable) +ALWAYS_INLINE static DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable) { - return (*SelectedDrawLineFunctions)[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)]; + return (*SelectedDrawLineFunctions)[u8(shading_enable)][u8(transparency_enable)]; } ALWAYS_INLINE static DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, @@ -59,11 +58,10 @@ ALWAYS_INLINE static DrawRectangleFunction GetDrawRectangleFunction(bool texture } ALWAYS_INLINE static DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, - bool raw_texture_enable, bool transparency_enable, - bool dithering_enable) + bool raw_texture_enable, bool transparency_enable) { return (*SelectedDrawTriangleFunctions)[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)] - [u8(transparency_enable)][u8(dithering_enable)]; + [u8(transparency_enable)]; } #define DECLARE_ALTERNATIVE_RASTERIZER(isa) \ diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index f1a8930cc..b49d344f7 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -20,7 +20,8 @@ namespace GPU_SW_Rasterizer { // TODO: UpdateVRAM, FillVRAM, etc. #ifdef USE_VECTOR -#if 0 +// #define CHECK_VECTOR +#ifdef CHECK_VECTOR static u16 s_vram_backup[VRAM_WIDTH * VRAM_HEIGHT]; static u16 s_new_vram[VRAM_WIDTH * VRAM_HEIGHT]; static u32 s_bad_counter = 0; @@ -48,9 +49,6 @@ static u32 s_bad_counter = 0; } \ /*Assert(std::memcmp(g_vram, s_new_vram, sizeof(g_vram)) == 0)*/ \ } while (0) -#else -#define BACKUP_VRAM() -#define CHECK_VRAM(drawer) #endif #endif @@ -77,7 +75,7 @@ static u32 s_bad_counter = 0; return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); } -template +template [[maybe_unused]] ALWAYS_INLINE_RELEASE static void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) @@ -129,6 +127,7 @@ templatedraw_mode.dither_enable; const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; @@ -142,6 +141,7 @@ templatedraw_mode.dither_enable; const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; @@ -251,8 +251,8 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); - ShadePixel(cmd, static_cast(x), draw_y, r, g, - b, texcoord_x, texcoord_y); + ShadePixel(cmd, static_cast(x), draw_y, r, g, b, + texcoord_x, texcoord_y); } } } @@ -502,7 +502,7 @@ struct PixelVectors }; } // namespace -template +template ALWAYS_INLINE_RELEASE static void ShadePixel(const PixelVectors& pv, GPUTextureMode texture_mode, GPUTransparencyMode transparency_mode, u32 start_x, u32 y, GSVectorNi vertex_color_rg, GSVectorNi vertex_color_ba, GSVectorNi texcoord_x, @@ -578,16 +578,8 @@ ShadePixel(const PixelVectors& pv, GPUTextureMode texture_mode, GSVectorNi ba = tba.mul16l(vertex_color_ba); // Convert to 5bit. - if constexpr (dithering_enable) - { - rg = rg.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); - ba = ba.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); - } - else - { - rg = rg.sra16<7>(); - ba = ba.sra16<7>(); - } + rg = rg.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); + ba = ba.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); // Bit15 gets passed through as-is. ba = ba.blend16<0xaa>(tba); @@ -604,25 +596,15 @@ ShadePixel(const PixelVectors& pv, GPUTextureMode texture_mode, else { // Non-textured transparent polygons don't set bit 15, but are treated as transparent. - if constexpr (dithering_enable) - { - GSVectorNi rg = vertex_color_rg.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); - GSVectorNi ba = vertex_color_ba.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); + GSVectorNi rg = vertex_color_rg.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); + GSVectorNi ba = vertex_color_ba.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); - // Clamp to 5bit. We use 32bit for BA to set a to zero. - rg = rg.min_u16(GSVectorNi::cxpr16(0x1F)); - ba = ba.min_u16(GSVectorNi::cxpr(0x1F)); + // Clamp to 5bit. We use 32bit for BA to set a to zero. + rg = rg.min_u16(GSVectorNi::cxpr16(0x1F)); + ba = ba.min_u16(GSVectorNi::cxpr(0x1F)); - // And interleave back to 16bpp. - color = RG_BAToRGB5A1(rg, ba); - } - else - { - // Note that bit15 is set to 0 here, which the shift will do. - const GSVectorNi rg = vertex_color_rg.srl16<3>(); - const GSVectorNi ba = vertex_color_ba.srl16<3>(); - color = RG_BAToRGB5A1(rg, ba); - } + // And interleave back to 16bpp. + color = RG_BAToRGB5A1(rg, ba); } GSVectorNi bg_color = LoadVector(start_x, y); @@ -725,7 +707,9 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) const PixelVectors pv(cmd); const u32 width = cmd->width; +#ifdef CHECK_VECTOR BACKUP_VRAM(); +#endif for (u32 offset_y = 0; offset_y < cmd->height; offset_y++) { @@ -751,7 +735,7 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) preserve_mask = preserve_mask | xvec.gt32(pv.clip_right); if (!preserve_mask.alltrue()) { - ShadePixel( + ShadePixel( pv, cmd->draw_mode.texture_mode, cmd->draw_mode.transparency_mode, x, draw_y, rg, ba, row_texcoord_x, texcoord_y, preserve_mask, GSVectorNi::zero()); } @@ -768,13 +752,15 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) texcoord_y = texcoord_y.add32(GSVectorNi::cxpr(1)) & GSVectorNi::cxpr(0xFF); } +#ifdef CHECK_VECTOR CHECK_VRAM(GPU_SW_Rasterizer::DrawRectangleFunctions[texture_enable][raw_texture_enable][transparency_enable](cmd)); +#endif } #endif // USE_VECTOR // TODO: Vectorize line draw. -template +template static void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1) { @@ -837,8 +823,8 @@ static void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawL const u8 g = shading_enable ? unfp_rgb(curg) : p0->g; const u8 b = shading_enable ? unfp_rgb(curb) : p0->b; - ShadePixel( - cmd, static_cast(x), static_cast(y) & VRAM_HEIGHT_MASK, r, g, b, 0, 0); + ShadePixel(cmd, static_cast(x), static_cast(y) & VRAM_HEIGHT_MASK, r, + g, b, 0, 0); } curx += dxdk; @@ -979,8 +965,7 @@ struct TrianglePart #ifndef USE_VECTOR -template +template static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep) { @@ -1009,7 +994,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start do { - ShadePixel( + ShadePixel( cmd, static_cast(current_x), static_cast(y), rgb.GetR(), rgb.GetG(), rgb.GetB(), uv.GetU(), uv.GetV()); current_x++; @@ -1020,8 +1005,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start } while (--width > 0); } -template +template ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) @@ -1070,7 +1054,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo continue; } - DrawSpan( + DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep); } while (current_y > end_y); } @@ -1098,7 +1082,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo if (y >= static_cast(g_drawing_area.top) && (!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (static_cast(current_y) & 1u))) { - DrawSpan( + DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep); } @@ -1158,8 +1142,7 @@ struct TriangleVectors : PixelVectors }; } // namespace -template +template static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep, const TriangleVectors& tv) @@ -1210,8 +1193,10 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start dv = GSVectorNi::zero(); } - const GSVectorNi dither = GSVectorNi::broadcast128( - &VECTOR_DITHER_MATRIX[static_cast(y) & 3][(static_cast(current_x) & 3) * 2]); + const GSVectorNi dither = cmd->draw_mode.dither_enable ? + GSVectorNi::broadcast128( + &VECTOR_DITHER_MATRIX[static_cast(y) & 3][(static_cast(current_x) & 3) * 2]) : + GSVectorNi::zero(); GSVectorNi xvec = GSVectorNi(current_x).add32(SPAN_OFFSET_VEC); GSVectorNi wvec = GSVectorNi(width).sub32(SPAN_WIDTH_VEC); @@ -1237,7 +1222,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start preserve_mask = preserve_mask | xvec.gt32(tv.clip_right); if (!preserve_mask.alltrue()) { - ShadePixel( + ShadePixel( tv, cmd->draw_mode.texture_mode, cmd->draw_mode.transparency_mode, static_cast(current_x), static_cast(y), rg, b, u, v, preserve_mask, dither); } @@ -1262,8 +1247,7 @@ static void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start } } -template +template ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCommand* cmd, const TrianglePart& tp, const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb, const RGBSteps& rgbstep) @@ -1314,7 +1298,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo continue; } - DrawSpan( + DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep, tv); } while (current_y > end_y); } @@ -1344,7 +1328,7 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo if (y >= static_cast(g_drawing_area.top) && (!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (static_cast(current_y) & 1u))) { - DrawSpan( + DrawSpan( cmd, y & VRAM_HEIGHT_MASK, unfp_xy(left_x), unfp_xy(right_x), luv, uvstep, lrgb, rgbstep, tv); } @@ -1362,12 +1346,11 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawPolygonCo #endif // USE_VECTOR -template +template static void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2) { -#if 0 +#ifdef CHECK_VECTOR const GPUBackendDrawPolygonCommand::Vertex* orig_v0 = v0; const GPUBackendDrawPolygonCommand::Vertex* orig_v1 = v1; const GPUBackendDrawPolygonCommand::Vertex* orig_v2 = v2; @@ -1500,20 +1483,20 @@ static void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBacke rgb.Init(top_left_vertex->r, top_left_vertex->g, top_left_vertex->b); } -#ifdef USE_VECTOR +#ifdef CHECK_VECTOR BACKUP_VRAM(); #endif for (u32 i = 0; i < 2; i++) { - DrawTrianglePart( - cmd, triparts[i], uv, uvstep, rgb, rgbstep); + DrawTrianglePart(cmd, triparts[i], uv, + uvstep, rgb, rgbstep); } -#ifdef USE_VECTOR +#ifdef CHECK_VECTOR CHECK_VRAM( - GPU_SW_Rasterizer::DrawTriangleFunctions[shading_enable][texture_enable][raw_texture_enable][transparency_enable] - [dithering_enable](cmd, orig_v0, orig_v1, orig_v2)); + GPU_SW_Rasterizer::DrawTriangleFunctions[shading_enable][texture_enable][raw_texture_enable][transparency_enable]( + cmd, orig_v0, orig_v1, orig_v2)); #endif } @@ -1523,29 +1506,18 @@ constinit const DrawRectangleFunctionTable DrawRectangleFunctions = { {{&DrawRectangle, &DrawRectangle}, {&DrawRectangle, &DrawRectangle}}}; -constinit const DrawLineFunctionTable DrawLineFunctions = { - {{&DrawLine, &DrawLine}, - {&DrawLine, &DrawLine}}, - {{&DrawLine, &DrawLine}, - {&DrawLine, &DrawLine}}}; +constinit const DrawLineFunctionTable DrawLineFunctions = {{&DrawLine, &DrawLine}, + {&DrawLine, &DrawLine}}; constinit const DrawTriangleFunctionTable DrawTriangleFunctions = { - {{{{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}, - {{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}}, - {{{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}, - {{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}}}, - {{{{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}, - {{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}}, - {{{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}, - {{&DrawTriangle, &DrawTriangle}, - {&DrawTriangle, &DrawTriangle}}}}}; + {{{&DrawTriangle, &DrawTriangle}, + {&DrawTriangle, &DrawTriangle}}, + {{&DrawTriangle, &DrawTriangle}, + {&DrawTriangle, &DrawTriangle}}}, + {{{&DrawTriangle, &DrawTriangle}, + {&DrawTriangle, &DrawTriangle}}, + {{&DrawTriangle, &DrawTriangle}, + {&DrawTriangle, &DrawTriangle}}}}; #ifdef __INTELLISENSE__ } diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index a6483a079..99fa8932f 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -348,8 +348,6 @@ struct GPUBackendDrawCommand : public GPUBackendCommand GPURenderCommand rc; GPUTexturePaletteReg palette; GPUTextureWindow window; - - ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; } }; struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand