Refactor UV offset and limit code to be shared by all renderers.

- Calculate and apply offset for quads with flipped UVs earlier.
- Reduce offsets applied in software renderer as resolution scaling increases (fixes line artefacts).
- Calculate UV limits used for filtering earlier and pass these to the individual renderers.
This commit is contained in:
iCatButler 2018-11-30 12:11:39 +00:00
parent a081f34a52
commit 1e0e774ad2
11 changed files with 310 additions and 296 deletions

View File

@ -148,6 +148,11 @@ struct PS_GPU
uint32 InQuad_clut;
bool InQuad_invalidW;
// primitive UV offsets (used to correct flipped sprites)
uint16_t off_u, off_v;
// primitive UV limits (used to clamp texture sampling)
uint16_t min_u, min_v, max_u, max_v;
line_point InPLine_PrevPoint;
uint32 FBRW_X;

View File

@ -1,4 +1,5 @@
#include <math.h>
#include <algorithm>
#include "libretro_cbs.h"
#define COORD_FBS 12
@ -173,7 +174,7 @@ static INLINE void DrawSpan(PS_GPU *gpu, int y, const int32 x_start, const int32
if(textured)
{
uint16 fbw = GetTexel<TexMode_TA>(gpu, ig.u >> (COORD_FBS + COORD_POST_PADDING), ig.v >> (COORD_FBS + COORD_POST_PADDING));
uint16 fbw = GetTexel<TexMode_TA>(gpu, ig.u >> (COORD_FBS + COORD_POST_PADDING), ig.v >> (COORD_FBS + COORD_POST_PADDING));
if(fbw)
{
@ -290,8 +291,8 @@ static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices)
if(textured)
{
ig.u = (COORD_MF_INT(vertices[core_vertex].u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
ig.v = (COORD_MF_INT(vertices[core_vertex].v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
ig.u = (COORD_MF_INT(vertices[core_vertex].u) + (1 << (COORD_FBS - 1 - gpu->upscale_shift))) << COORD_POST_PADDING;
ig.v = (COORD_MF_INT(vertices[core_vertex].v) + (1 << (COORD_FBS - 1 - gpu->upscale_shift))) << COORD_POST_PADDING;
if (gpu->upscale_shift > 0)
{
@ -300,10 +301,10 @@ static INLINE void DrawTriangle(PS_GPU *gpu, tri_vertex *vertices)
// triangles. Otherwise this could cause a small "shift" in
// the texture coordinates when upscaling.
if (idl.du_dy == 0 && (int32_t)idl.du_dx > 0)
ig.u -= (1 << (COORD_FBS - 1 - gpu->upscale_shift));
if (idl.dv_dx == 0 && (int32_t)idl.dv_dy > 0)
ig.v -= (1 << (COORD_FBS - 1 - gpu->upscale_shift));
if(gpu->off_u)
ig.u += (COORD_MF_INT(1) - (1 << (COORD_FBS - gpu->upscale_shift))) << COORD_POST_PADDING;
if (gpu->off_v)
ig.v += (COORD_MF_INT(1) - (1 << (COORD_FBS - gpu->upscale_shift))) << COORD_POST_PADDING;
}
}
@ -491,6 +492,189 @@ if(vertices[1].y == vertices[0].y)
#endif
}
// Determine whether to offset UVs to account for difference in interpolation between PS1 and modern GPUs
void Calc_UVOffsets(PS_GPU *gpu, tri_vertex *vertices, unsigned count)
{
// iCB: Just borrowing this from \parallel-psx\renderer\renderer.cpp
uint16 off_u = 0;
uint16 off_v = 0;
// For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior.
// If U or V is decreasing in X or Y, and we use the provided U/V as is, we will sample the wrong texel as interpolation
// covers an entire pixel, while PSX samples its interpolation essentially in the top-left corner and splats that interpolant across the entire pixel.
// While we could emulate this reasonably well in native resolution by shifting our vertex coords by 0.5,
// this breaks in upscaling scenarios, because we have several samples per native sample and we need NN rules to hit the same UV every time.
// One approach here is to use interpolate at offset or similar tricks to generalize the PSX interpolation patterns,
// but the problem is that vertices sharing an edge will no longer see the same UV (due to different plane derivatives),
// we end up sampling outside the intended boundary and artifacts are inevitable, so the only case where we can apply this fixup is for "sprites"
// or similar which should not share edges, which leads to this unfortunate code below.
//
// Only apply this workaround for quads.
if (count == 4)
{
// It might be faster to do more direct checking here, but the code below handles primitives in any order
// and orientation, and is far more SIMD-friendly if needed.
float abx = vertices[1].precise[0] - vertices[0].precise[0];
float aby = vertices[1].precise[1] - vertices[0].precise[1];
float bcx = vertices[2].precise[0] - vertices[1].precise[0];
float bcy = vertices[2].precise[1] - vertices[1].precise[1];
float cax = vertices[0].precise[0] - vertices[2].precise[0];
float cay = vertices[0].precise[1] - vertices[2].precise[1];
// Compute static derivatives, just assume W is uniform across the primitive
// and that the plane equation remains the same across the quad.
float dudx = -aby * float(vertices[2].u) - bcy * float(vertices[0].u) - cay * float(vertices[1].u);
float dvdx = -aby * float(vertices[2].v) - bcy * float(vertices[0].v) - cay * float(vertices[1].v);
float dudy = +abx * float(vertices[2].u) + bcx * float(vertices[0].u) + cax * float(vertices[1].u);
float dvdy = +abx * float(vertices[2].v) + bcx * float(vertices[0].v) + cax * float(vertices[1].v);
float area = bcx * cay - bcy * cax;
// iCB: Detect and reject any triangles with 0 size texture area
float texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v);
// Shouldn't matter as degenerate primitives will be culled anyways.
if ((area != 0.0f) && (texArea != 0.0f))
{
float inv_area = 1.0f / area;
dudx *= inv_area;
dudy *= inv_area;
dvdx *= inv_area;
dvdy *= inv_area;
bool neg_dudx = dudx < 0.0f;
bool neg_dudy = dudy < 0.0f;
bool neg_dvdx = dvdx < 0.0f;
bool neg_dvdy = dvdy < 0.0f;
bool zero_dudx = dudx == 0.0f;
bool zero_dudy = dudy == 0.0f;
bool zero_dvdx = dvdx == 0.0f;
bool zero_dvdy = dvdy == 0.0f;
// If we have negative dU or dV in any direction, increment the U or V to work properly with nearest-neighbor in this impl.
// If we don't have 1:1 pixel correspondence, this creates a slight "shift" in the sprite, but we guarantee that we don't sample garbage at least.
// Overall, this is kinda hacky because there can be legitimate, rare cases where 3D meshes hit this scenario, and a single texel offset can pop in, but
// this is way better than having borked 2D overall.
// TODO: Try to figure out if this can be generalized.
//
// TODO: If perf becomes an issue, we can probably SIMD the 8 comparisons above,
// create an 8-bit code, and use a LUT to get the offsets.
// Case 1: U is decreasing in X, but no change in Y.
// Case 2: U is decreasing in Y, but no change in X.
// Case 3: V is decreasing in X, but no change in Y.
// Case 4: V is decreasing in Y, but no change in X.
if (neg_dudx && zero_dudy)
off_u++;
else if (neg_dudy && zero_dudx)
off_u++;
if (neg_dvdx && zero_dvdy)
off_v++;
else if (neg_dvdy && zero_dvdx)
off_v++;
}
}
gpu->off_u = off_u;
gpu->off_v = off_v;
}
// Reset min/max UVs for primitive
void Reset_UVLimits(PS_GPU *gpu)
{
gpu->min_u = UINT16_MAX;
gpu->min_v = UINT16_MAX;
gpu->max_u = 0;
gpu->max_v = 0;
}
// Determine min and max UVs sampled for a given primitive
void Extend_UVLimits(PS_GPU *gpu, tri_vertex *vertices, unsigned count)
{
uint8 twx = gpu->SUCV.TWX_AND;
uint8 twy = gpu->SUCV.TWY_AND;
uint16 min_u = gpu->min_u;
uint16 min_v = gpu->min_v;
uint16 max_u = gpu->max_u;
uint16 max_v = gpu->max_v;
if ((twx == (uint8)0xffu) && (twy == (uint8)0xffu))
{
// If we're not using texture window, we're likely accessing a small subset of the texture.
for (unsigned int i = 0; i < count; i++)
{
min_u = std::min(min_u, uint16_t(vertices[i].u));
min_v = std::min(min_v, uint16_t(vertices[i].v));
max_u = std::max(max_u, uint16_t(vertices[i].u));
max_v = std::max(max_v, uint16_t(vertices[i].v));
}
}
else
{
// texture window so don't clamp texture
min_u = 0;
min_v = 0;
max_u = UINT16_MAX;
max_v = UINT16_MAX;
}
gpu->min_u = min_u;
gpu->min_v = min_v;
gpu->max_u = max_u;
gpu->max_v = max_v;
}
// Apply offsets to UV limits before returning
void Finalise_UVLimits(PS_GPU *gpu)
{
uint8 twx = gpu->SUCV.TWX_AND;
uint8 twy = gpu->SUCV.TWY_AND;
uint16 min_u = gpu->min_u;
uint16 min_v = gpu->min_v;
uint16 max_u = gpu->max_u;
uint16 max_v = gpu->max_v;
uint16 off_u = gpu->off_u;
uint16 off_v = gpu->off_v;
if ((twx == (uint8)0xffu) && (twy == (uint8)0xffu))
{
// offset output UV Limits
min_u += off_u;
min_v += off_v;
max_u += off_u;
max_v += off_v;
// In nearest neighbor, we'll get *very* close to this UV, but not close enough to actually sample it.
// If du/dx or dv/dx are negative, we probably need to invert this though ...
if (max_u > min_u)
max_u--;
if (max_v > min_v)
max_v--;
// If there's no wrapping, we can prewrap and avoid fallback.
if ((max_u & 0xff00) == (min_u & 0xff00))
max_u &= 0xff;
if ((max_v & 0xff00) == (min_v & 0xff00))
max_v &= 0xff;
}
else
{
// texture window so don't clamp texture
min_u = 0;
min_v = 0;
max_u = UINT16_MAX;
max_v = UINT16_MAX;
}
gpu->min_u = min_u;
gpu->min_v = min_v;
gpu->max_u = max_u;
gpu->max_v = max_v;
}
// 0 = disabled
// 1 = enabled (default mode)
// 2 = enabled (aggressive mode)
@ -776,6 +960,10 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
for (unsigned v = 0; v < 3; v++)
vertices[v].precise[2] = 1.f;
// Calculated UV offsets (needed for hardware renderers and software with scaling)
// Do one time updates for primitive
if (textured && (gpu->InCmd != INCMD_QUAD))
Calc_UVOffsets(gpu, vertices, numvertices);
if(numvertices == 4)
{
@ -847,6 +1035,8 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
if (rsx_intf_is_type() == RSX_OPENGL || rsx_intf_is_type() == RSX_VULKAN)
{
Reset_UVLimits(gpu);
if (numvertices == 4)
{
if (gpu->InCmd == INCMD_NONE)
@ -854,6 +1044,10 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
// We have 4 quad vertices, we can push that at once
tri_vertex *first = &gpu->InQuad_F3Vertices[0];
Extend_UVLimits(gpu, first, 1);
Extend_UVLimits(gpu, vertices, 3);
Finalise_UVLimits(gpu);
rsx_intf_push_quad(first->precise[0],
first->precise[1],
first->precise[2],
@ -878,10 +1072,12 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
((uint32_t)vertices[2].r) |
((uint32_t)vertices[2].g << 8) |
((uint32_t)vertices[2].b << 16),
first->u, first->v,
vertices[0].u, vertices[0].v,
vertices[1].u, vertices[1].v,
vertices[2].u, vertices[2].v,
first->u + gpu->off_u, first->v + gpu->off_v,
vertices[0].u + gpu->off_u, vertices[0].v + gpu->off_v,
vertices[1].u + gpu->off_u, vertices[1].v + gpu->off_v,
vertices[2].u + gpu->off_u, vertices[2].v + gpu->off_v,
gpu->min_u, gpu->min_v,
gpu->max_u, gpu->max_v,
gpu->TexPageX, gpu->TexPageY,
clut_x, clut_y,
blend_mode,
@ -894,6 +1090,9 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
}
else
{
Extend_UVLimits(gpu, vertices, 3);
Finalise_UVLimits(gpu);
// Push a single triangle
rsx_intf_push_triangle(vertices[0].precise[0],
vertices[0].precise[1],
@ -916,6 +1115,8 @@ static void Command_DrawPolygon(PS_GPU *gpu, const uint32_t *cb)
vertices[0].u, vertices[0].v,
vertices[1].u, vertices[1].v,
vertices[2].u, vertices[2].v,
gpu->min_u, gpu->min_v,
gpu->max_u, gpu->max_v,
gpu->TexPageX, gpu->TexPageY,
clut_x, clut_y,
blend_mode,

View File

@ -36,8 +36,8 @@ static void DrawSprite(PS_GPU *gpu, int32_t x_arg, int32_t y_arg, int32_t w, int
//else
// u = (u + 1) & ~1;
if(FlipY)
v_inc = -1;
if(FlipY)
v_inc = -1;
}
if(x_start < gpu->ClipX0)
@ -212,6 +212,10 @@ static void Command_DrawSprite(PS_GPU *gpu, const uint32_t *cb)
v + h, /* t2y */
u + w, /* t5x */
v + h, /* t5y */
u,
v,
u + w - 1, // clamp UVs 1 pixel from edge (sampling should not quite reach it)
v + h - 1,
gpu->TexPageX,
gpu->TexPageY,
clut_x,

View File

@ -770,12 +770,7 @@ float Renderer::allocate_depth(const Rect &rect)
void Renderer::build_attribs(BufferVertex *output, const Vertex *vertices, unsigned count)
{
unsigned min_u = UINT16_MAX;
unsigned max_u = 0;
unsigned min_v = UINT16_MAX;
unsigned max_v = 0;
unsigned texture_limits[4];
unsigned shift;
switch (render_state.texture_mode)
@ -791,119 +786,16 @@ void Renderer::build_attribs(BufferVertex *output, const Vertex *vertices, unsig
break;
}
uint16_t off_u = 0;
uint16_t off_v = 0;
if (render_state.texture_mode != TextureMode::None)
{
// For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior.
// If U or V is decreasing in X or Y, and we use the provided U/V as is, we will sample the wrong texel as interpolation
// covers an entire pixel, while PSX samples its interpolation essentially in the top-left corner and splats that interpolant across the entire pixel.
// While we could emulate this reasonably well in native resolution by shifting our vertex coords by 0.5,
// this breaks in upscaling scenarios, because we have several samples per native sample and we need NN rules to hit the same UV every time.
// One approach here is to use interpolate at offset or similar tricks to generalize the PSX interpolation patterns,
// but the problem is that vertices sharing an edge will no longer see the same UV (due to different plane derivatives),
// we end up sampling outside the intended boundary and artifacts are inevitable, so the only case where we can apply this fixup is for "sprites"
// or similar which should not share edges, which leads to this unfortunate code below.
//
// Only apply this workaround for quads.
if (count == 4)
{
// It might be faster to do more direct checking here, but the code below handles primitives in any order
// and orientation, and is far more SIMD-friendly if needed.
float abx = vertices[1].x - vertices[0].x;
float aby = vertices[1].y - vertices[0].y;
float bcx = vertices[2].x - vertices[1].x;
float bcy = vertices[2].y - vertices[1].y;
float cax = vertices[0].x - vertices[2].x;
float cay = vertices[0].y - vertices[2].y;
// Compute static derivatives, just assume W is uniform across the primitive
// and that the plane equation remains the same across the quad.
float dudx = -aby * float(vertices[2].u) - bcy * float(vertices[0].u) - cay * float(vertices[1].u);
float dvdx = -aby * float(vertices[2].v) - bcy * float(vertices[0].v) - cay * float(vertices[1].v);
float dudy = +abx * float(vertices[2].u) + bcx * float(vertices[0].u) + cax * float(vertices[1].u);
float dvdy = +abx * float(vertices[2].v) + bcx * float(vertices[0].v) + cax * float(vertices[1].v);
float area = bcx * cay - bcy * cax;
// iCB: Detect and reject any triangles with 0 size texture area
float texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v);
// Shouldn't matter as degenerate primitives will be culled anyways.
if ((area != 0.0f) && (texArea != 0.0f))
{
float inv_area = 1.0f / area;
dudx *= inv_area;
dudy *= inv_area;
dvdx *= inv_area;
dvdy *= inv_area;
bool neg_dudx = dudx < 0.0f;
bool neg_dudy = dudy < 0.0f;
bool neg_dvdx = dvdx < 0.0f;
bool neg_dvdy = dvdy < 0.0f;
bool zero_dudx = dudx == 0.0f;
bool zero_dudy = dudy == 0.0f;
bool zero_dvdx = dvdx == 0.0f;
bool zero_dvdy = dvdy == 0.0f;
// If we have negative dU or dV in any direction, increment the U or V to work properly with nearest-neighbor in this impl.
// If we don't have 1:1 pixel correspondence, this creates a slight "shift" in the sprite, but we guarantee that we don't sample garbage at least.
// Overall, this is kinda hacky because there can be legitimate, rare cases where 3D meshes hit this scenario, and a single texel offset can pop in, but
// this is way better than having borked 2D overall.
// TODO: Try to figure out if this can be generalized.
//
// TODO: If perf becomes an issue, we can probably SIMD the 8 comparisons above,
// create an 8-bit code, and use a LUT to get the offsets.
// Case 1: U is decreasing in X, but no change in Y.
// Case 2: U is decreasing in Y, but no change in X.
// Case 3: V is decreasing in X, but no change in Y.
// Case 4: V is decreasing in Y, but no change in X.
if (neg_dudx && zero_dudy)
off_u++;
else if (neg_dudy && zero_dudx)
off_u++;
if (neg_dvdx && zero_dvdy)
off_v++;
else if (neg_dvdy && zero_dvdx)
off_v++;
}
}
if (render_state.texture_window.mask_x == 0xffu && render_state.texture_window.mask_y == 0xffu)
{
// If we're not using texture window, we're likely accessing a small subset of the texture.
for (unsigned i = 0; i < count; i++)
{
min_u = min<unsigned>(min_u, vertices[i].u);
max_u = max<unsigned>(max_u, vertices[i].u);
min_v = min<unsigned>(min_v, vertices[i].v);
max_v = max<unsigned>(max_v, vertices[i].v);
}
min_u += off_u;
max_u += off_u;
min_v += off_v;
max_v += off_v;
// In nearest neighbor, we'll get *very* close to this UV, but not close enough to actually sample it.
// If du/dx or dv/dx are negative, we probably need to invert this though ...
if (max_u > min_u)
max_u--;
if (max_v > min_v)
max_v--;
// If there's no wrapping, we can prewrap and avoid fallback.
if ((max_u & 0xff00) == (min_u & 0xff00))
max_u &= 0xff;
if ((max_v & 0xff00) == (min_v & 0xff00))
max_v &= 0xff;
texture_limits[0] = min_u;
texture_limits[1] = min_v;
texture_limits[2] = max_u;
texture_limits[3] = max_v;
unsigned min_u = render_state.UVLimits.min_u;
unsigned min_v = render_state.UVLimits.min_v;
unsigned max_u = render_state.UVLimits.max_u;
unsigned max_v = render_state.UVLimits.max_v;
unsigned width = max_u - min_u + 1;
unsigned height = max_v - min_v + 1;
@ -968,8 +860,8 @@ void Renderer::build_attribs(BufferVertex *output, const Vertex *vertices, unsig
int16_t(render_state.palette_offset_x),
int16_t(render_state.palette_offset_y),
int16_t(shift | (render_state.dither << 8)),
int16_t(vertices[i].u + off_u),
int16_t(vertices[i].v + off_v),
int16_t(vertices[i].u),
int16_t(vertices[i].v),
int16_t(render_state.texture_offset_x),
int16_t(render_state.texture_offset_y),
};
@ -979,10 +871,10 @@ void Renderer::build_attribs(BufferVertex *output, const Vertex *vertices, unsig
output[i].color |= render_state.force_mask_bit ? 0xff000000u : 0u;
output[i].min_u = float(texture_limits[0]);
output[i].min_v = float(texture_limits[1]);
output[i].max_u = float(texture_limits[2]);
output[i].max_v = float(texture_limits[3]);
output[i].min_u = float(render_state.UVLimits.min_u);
output[i].min_v = float(render_state.UVLimits.min_v);
output[i].max_u = float(render_state.UVLimits.max_u);
output[i].max_v = float(render_state.UVLimits.max_v);
}
}

View File

@ -25,6 +25,11 @@ struct TextureWindow
uint8_t mask_x, mask_y, or_x, or_y;
};
struct UVRect
{
uint16_t min_u, min_v, max_u, max_v;
};
enum class SemiTransparentMode
{
None,
@ -59,6 +64,8 @@ public:
bool bpp24 = false;
bool dither = false;
bool adaptive_smoothing = true;
UVRect UVLimits;
};
struct SaveState
@ -162,6 +169,14 @@ public:
render_state.texture_color_modulate = enable;
}
inline void set_UV_limits(uint16_t min_u, uint16_t min_v, uint16_t max_u, uint16_t max_v)
{
render_state.UVLimits.min_u = min_u;
render_state.UVLimits.min_v = min_v;
render_state.UVLimits.max_u = max_u;
render_state.UVLimits.max_v = max_v;
}
// Draw commands
void clear_rect(const Rect &rect, FBColor color);
void draw_line(const Vertex *vertices);

View File

@ -357,6 +357,8 @@ void rsx_intf_push_triangle(
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -387,6 +389,7 @@ void rsx_intf_push_triangle(
#if defined(HAVE_OPENGL) || defined(HAVE_OPENGLES)
rsx_gl_push_triangle(p0x, p0y, p0w, p1x, p1y, p1w, p2x, p2y, p2w,
c0, c1, c2, t0x, t0y, t1x, t1y, t2x, t2y,
min_u, min_v, max_u, max_v,
texpage_x, texpage_y, clut_x, clut_y,
texture_blend_mode,
depth_shift,
@ -398,6 +401,7 @@ void rsx_intf_push_triangle(
#if defined(HAVE_VULKAN)
rsx_vulkan_push_triangle(p0x, p0y, p0w, p1x, p1y, p1w, p2x, p2y, p2w,
c0, c1, c2, t0x, t0y, t1x, t1y, t2x, t2y,
min_u, min_v, max_u, max_v,
texpage_x, texpage_y, clut_x, clut_y,
texture_blend_mode,
depth_shift,
@ -418,6 +422,8 @@ void rsx_intf_push_quad(
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -450,6 +456,7 @@ void rsx_intf_push_quad(
rsx_gl_push_quad(p0x, p0y, p0w, p1x, p1y, p1w, p2x, p2y, p2w, p3x, p3y, p3w,
c0, c1, c2, c3,
t0x, t0y, t1x, t1y, t2x, t2y, t3x, t3y,
min_u, min_v, max_u, max_v,
texpage_x, texpage_y, clut_x, clut_y,
texture_blend_mode,
depth_shift,
@ -462,6 +469,7 @@ void rsx_intf_push_quad(
rsx_vulkan_push_quad(p0x, p0y, p0w, p1x, p1y, p1w, p2x, p2y, p2w, p3x, p3y, p3w,
c0, c1, c2, c3,
t0x, t0y, t1x, t1y, t2x, t2y, t3x, t3y,
min_u, min_v, max_u, max_v,
texpage_x, texpage_y, clut_x, clut_y,
texture_blend_mode,
depth_shift,

View File

@ -55,6 +55,8 @@ void rsx_intf_push_triangle(float p0x, float p0y, float p0w,
uint32_t c0, uint32_t c1, uint32_t c2,
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t t2x, uint16_t t2y,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
@ -80,6 +82,8 @@ void rsx_intf_push_quad(float p0x, float p0y, float p0w,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,

View File

@ -163,10 +163,11 @@ struct CommandVertex {
uint8_t dither;
/* 0: primitive is opaque, 1: primitive is semi-transparent */
uint8_t semi_transparent;
/* Texture window mask/OR values */
uint8_t texture_window[4];
/* Texture limits of primtive */
uint16_t texture_limits[4];
/* Texture window mask/OR values */
uint8_t texture_window[4];
static std::vector<Attribute> attributes();
};
@ -1308,11 +1309,11 @@ static bool GlRenderer_new(GlRenderer *renderer, DrawConfig config)
if (dither_mode == DITHER_OFF)
{
/* Dithering is superfluous when we increase the internal
* color depth, but users asked for it */
DrawBuffer_disable_attribute(command_buffer, "dither");
* color depth, but users asked for it */
DrawBuffer_disable_attribute(command_buffer, "dither");
} else
{
DrawBuffer_enable_attribute(command_buffer, "dither");
DrawBuffer_enable_attribute(command_buffer, "dither");
}
GLenum command_draw_mode = wireframe ? GL_LINE : GL_FILL;
@ -1558,22 +1559,22 @@ static bool retro_refresh_variables(GlRenderer *renderer)
dither_mode dither_mode = DITHER_NATIVE;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (!strcmp(var.value, "1x(native)"))
{
dither_mode = DITHER_NATIVE;
DrawBuffer_enable_attribute(renderer->command_buffer, "dither");
}
if (!strcmp(var.value, "1x(native)"))
{
dither_mode = DITHER_NATIVE;
DrawBuffer_enable_attribute(renderer->command_buffer, "dither");
}
else if (!strcmp(var.value, "internal resolution"))
{
dither_mode = DITHER_UPSCALED;
DrawBuffer_enable_attribute(renderer->command_buffer, "dither");
}
else if (!strcmp(var.value, "disabled"))
{
dither_mode = DITHER_OFF;
DrawBuffer_disable_attribute(renderer->command_buffer, "dither");
}
else if (!strcmp(var.value, "internal resolution"))
{
dither_mode = DITHER_UPSCALED;
DrawBuffer_enable_attribute(renderer->command_buffer, "dither");
}
else if (!strcmp(var.value, "disabled"))
{
dither_mode = DITHER_OFF;
DrawBuffer_disable_attribute(renderer->command_buffer, "dither");
}
}
var.key = BEETLE_OPT(wireframe);
@ -1590,7 +1591,7 @@ static bool retro_refresh_variables(GlRenderer *renderer)
if (rebuild_fb_out)
{
if (dither_mode == DITHER_OFF)
if (dither_mode == DITHER_OFF)
DrawBuffer_disable_attribute(renderer->command_buffer, "dither");
else
DrawBuffer_enable_attribute(renderer->command_buffer, "dither");
@ -1664,147 +1665,7 @@ static bool retro_refresh_variables(GlRenderer *renderer)
return reconfigure_frontend;
}
static void texCoord_preprocessing(
GlRenderer *renderer,
CommandVertex *vertices,
unsigned count)
{
// iCB: Just borrowing this from \parallel-psx\renderer\renderer.cpp
uint16_t min_u = UINT16_MAX;
uint16_t max_u = 0;
uint16_t min_v = UINT16_MAX;
uint16_t max_v = 0;
uint16_t off_u = 0;
uint16_t off_v = 0;
if (vertices[0].texture_blend_mode != 0)
{
// For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior.
// If U or V is decreasing in X or Y, and we use the provided U/V as is, we will sample the wrong texel as interpolation
// covers an entire pixel, while PSX samples its interpolation essentially in the top-left corner and splats that interpolant across the entire pixel.
// While we could emulate this reasonably well in native resolution by shifting our vertex coords by 0.5,
// this breaks in upscaling scenarios, because we have several samples per native sample and we need NN rules to hit the same UV every time.
// One approach here is to use interpolate at offset or similar tricks to generalize the PSX interpolation patterns,
// but the problem is that vertices sharing an edge will no longer see the same UV (due to different plane derivatives),
// we end up sampling outside the intended boundary and artifacts are inevitable, so the only case where we can apply this fixup is for "sprites"
// or similar which should not share edges, which leads to this unfortunate code below.
//
// Only apply this workaround for quads.
if (count == 4)
{
// It might be faster to do more direct checking here, but the code below handles primitives in any order
// and orientation, and is far more SIMD-friendly if needed.
float abx = vertices[1].position[0] - vertices[0].position[0];
float aby = vertices[1].position[1] - vertices[0].position[1];
float bcx = vertices[2].position[0] - vertices[1].position[0];
float bcy = vertices[2].position[1] - vertices[1].position[1];
float cax = vertices[0].position[0] - vertices[2].position[0];
float cay = vertices[0].position[1] - vertices[2].position[1];
// Compute static derivatives, just assume W is uniform across the primitive
// and that the plane equation remains the same across the quad.
float dudx = -aby * float(vertices[2].texture_coord[0]) - bcy * float(vertices[0].texture_coord[0]) - cay * float(vertices[1].texture_coord[0]);
float dvdx = -aby * float(vertices[2].texture_coord[1]) - bcy * float(vertices[0].texture_coord[1]) - cay * float(vertices[1].texture_coord[1]);
float dudy = +abx * float(vertices[2].texture_coord[0]) + bcx * float(vertices[0].texture_coord[0]) + cax * float(vertices[1].texture_coord[0]);
float dvdy = +abx * float(vertices[2].texture_coord[1]) + bcx * float(vertices[0].texture_coord[1]) + cax * float(vertices[1].texture_coord[1]);
float area = bcx * cay - bcy * cax;
// iCB: Detect and reject any triangles with 0 size texture area
float texArea = (vertices[1].texture_coord[0] - vertices[0].texture_coord[0]) * (vertices[2].texture_coord[1] - vertices[0].texture_coord[1]) - (vertices[2].texture_coord[0] - vertices[0].texture_coord[0]) * (vertices[1].texture_coord[1] - vertices[0].texture_coord[1]);
// Shouldn't matter as degenerate primitives will be culled anyways.
if ((area != 0.0f) && (texArea != 0.0f))
{
float inv_area = 1.0f / area;
dudx *= inv_area;
dudy *= inv_area;
dvdx *= inv_area;
dvdy *= inv_area;
bool neg_dudx = dudx < 0.0f;
bool neg_dudy = dudy < 0.0f;
bool neg_dvdx = dvdx < 0.0f;
bool neg_dvdy = dvdy < 0.0f;
bool zero_dudx = dudx == 0.0f;
bool zero_dudy = dudy == 0.0f;
bool zero_dvdx = dvdx == 0.0f;
bool zero_dvdy = dvdy == 0.0f;
// If we have negative dU or dV in any direction, increment the U or V to work properly with nearest-neighbor in this impl.
// If we don't have 1:1 pixel correspondence, this creates a slight "shift" in the sprite, but we guarantee that we don't sample garbage at least.
// Overall, this is kinda hacky because there can be legitimate, rare cases where 3D meshes hit this scenario, and a single texel offset can pop in, but
// this is way better than having borked 2D overall.
// TODO: Try to figure out if this can be generalized.
//
// TODO: If perf becomes an issue, we can probably SIMD the 8 comparisons above,
// create an 8-bit code, and use a LUT to get the offsets.
// Case 1: U is decreasing in X, but no change in Y.
// Case 2: U is decreasing in Y, but no change in X.
// Case 3: V is decreasing in X, but no change in Y.
// Case 4: V is decreasing in Y, but no change in X.
if (neg_dudx && zero_dudy)
off_u++;
else if (neg_dudy && zero_dudx)
off_u++;
if (neg_dvdx && zero_dvdy)
off_v++;
else if (neg_dvdy && zero_dvdx)
off_v++;
}
}
if (renderer->tex_x_mask == 0xffu && renderer->tex_y_mask == 0xffu)
{
// If we're not using texture window, we're likely accessing a small subset of the texture.
for (unsigned i = 0; i < count; i++)
{
min_u = std::min(min_u, vertices[i].texture_coord[0]);
max_u = std::max(max_u, vertices[i].texture_coord[0]);
min_v = std::min(min_v, vertices[i].texture_coord[1]);
max_v = std::max(max_v, vertices[i].texture_coord[1]);
}
min_u += off_u;
max_u += off_u;
min_v += off_v;
max_v += off_v;
// In nearest neighbor, we'll get *very* close to this UV, but not close enough to actually sample it.
// If du/dx or dv/dx are negative, we probably need to invert this though ...
if (max_u > min_u)
max_u--;
if (max_v > min_v)
max_v--;
// If there's no wrapping, we can prewrap and avoid fallback.
if ((max_u & 0xff00) == (min_u & 0xff00))
max_u &= 0xff;
if ((max_v & 0xff00) == (min_v & 0xff00))
max_v &= 0xff;
}
else
{
// texture window so don't clamp texture
min_u = 0;
max_u = UINT16_MAX;
min_v = 0;
max_v = UINT16_MAX;
}
}
for (unsigned i = 0; i < count; i++)
{
vertices[i].texture_coord[0] += off_u;
vertices[i].texture_coord[1] += off_v;
vertices[i].texture_limits[0] = min_u;
vertices[i].texture_limits[1] = min_v;
vertices[i].texture_limits[2] = max_u;
vertices[i].texture_limits[3] = max_v;
}
}
static void vertex_preprocessing(
GlRenderer *renderer,
@ -1837,8 +1698,7 @@ static void vertex_preprocessing(
int16_t z = renderer->primitive_ordering;
renderer->primitive_ordering += 1;
texCoord_preprocessing(renderer, v, count);
for (unsigned i = 0; i < count; i++)
{
@ -2537,6 +2397,8 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -2591,6 +2453,7 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
},
{
{p1x, p1y, 0.95, p1w }, /* position */
@ -2602,6 +2465,7 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
},
{
{p2x, p2y, 0.95, p2w }, /* position */
@ -2613,6 +2477,7 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
},
{
{p3x, p3y, 0.95, p3w }, /* position */
@ -2624,6 +2489,7 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{ min_u, min_v, max_u, max_v },
},
};
@ -2652,6 +2518,8 @@ void rsx_gl_push_triangle( float p0x, float p0y, float p0w,
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -2705,6 +2573,7 @@ void rsx_gl_push_triangle( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
},
{
{p1x, p1y, 0.95, p1w }, /* position */
@ -2716,6 +2585,7 @@ void rsx_gl_push_triangle( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
},
{
{p2x, p2y, 0.95, p2w }, /* position */
@ -2727,6 +2597,7 @@ void rsx_gl_push_triangle( float p0x, float p0y, float p0w,
depth_shift,
(uint8_t) dither,
semi_transparent,
{min_u, min_v, max_u, max_v},
}
};

View File

@ -39,6 +39,8 @@ void rsx_gl_push_triangle( float p0x, float p0y, float p0w,
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -59,6 +61,8 @@ void rsx_gl_push_quad( float p0x, float p0y, float p0w,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,

View File

@ -361,6 +361,8 @@ void rsx_vulkan_push_quad(
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -377,6 +379,7 @@ void rsx_vulkan_push_quad(
renderer->set_dither(dither);
renderer->set_mask_test(mask_test);
renderer->set_force_mask_bit(set_mask);
renderer->set_UV_limits(min_u, min_v, max_u, max_v);
if (texture_blend_mode != 0)
{
switch (depth_shift)
@ -436,6 +439,8 @@ void rsx_vulkan_push_triangle(
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -452,6 +457,7 @@ void rsx_vulkan_push_triangle(
renderer->set_dither(dither);
renderer->set_mask_test(mask_test);
renderer->set_force_mask_bit(set_mask);
renderer->set_UV_limits(min_u, min_v, max_u, max_v);
if (texture_blend_mode != 0)
{
switch (depth_shift)

View File

@ -35,6 +35,8 @@ void rsx_vulkan_push_triangle(float p0x, float p0y, float p0w,
uint16_t t0x, uint16_t t0y,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,
@ -51,6 +53,8 @@ void rsx_vulkan_push_quad(float p0x, float p0y, float p0w,
uint16_t t1x, uint16_t t1y,
uint16_t t2x, uint16_t t2y,
uint16_t t3x, uint16_t t3y,
uint16_t min_u, uint16_t min_v,
uint16_t max_u, uint16_t max_v,
uint16_t texpage_x, uint16_t texpage_y,
uint16_t clut_x, uint16_t clut_y,
uint8_t texture_blend_mode,