Add ray traced curvature shader (#604)

* Ray-sphere intersection

* Implement spherical mapping with AA; Found mip-mapping bug

* Implement screen tilt

* Implement camera offset with tilt

* Clean up code a bit

* Implement perfect zoom; tweak params

* Add zero curvature

* Implement rounded corner

* Add aspect ratio forcing; Add bilinear filtering compile time switch

* Make rounded corner AA more generous

* Implement cylinder shape

* Implement ortho view

* Implement proper trilinear filtering

* Add LOD bias option

* Better sampling

* Initial cleanup

* Apply simplification that curvature_o = 0

* Compress code; Replace shape branching with multiplication

* Move stuff to the vert shader

* Inline update_frustum; Simplify cyl_ax = plane_v

* Separate out vertex shader

* Separate out ray tracing part of frag shader

* Make hq preset default

* Rename to rt curvature; Fix curv = 0 bug

* Fix rotated games; Finalize simplifications and comments

* Move to CRT folder; Add append preset

* Longer comment

* Add additional runtime switch for reducing aliasing when appending
This commit is contained in:
fishcu 2024-06-15 00:48:23 +02:00 committed by GitHub
parent e5b20b1422
commit 693c632bdd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 379 additions and 39 deletions

View File

@ -0,0 +1,10 @@
shaders = 2
shader0 = ../stock.slang
filter_linear0 = false
scale_type0 = source
shader1 = shaders/rt_curvature/rt_curvature.slang
filter_linear1 = true
scale_type1 = viewport
mipmap_input1 = true

View File

@ -0,0 +1,9 @@
shaders = 1
shader0 = shaders/rt_curvature/rt_curvature.slang
filter_linear0 = true
scale_type0 = viewport
mipmap_input0 = true
parameters = "RT_CURV_APPEND"
RT_CURV_APPEND = 1.0

View File

@ -0,0 +1,21 @@
// See the main shader file for copyright and other information.
// clang-format off
#pragma parameter RT_CURV_SETTINGS "=== Ray Traced Curvature v1.0 settings ===" 0.0 0.0 1.0 1.0
#pragma parameter RT_CURV_CURVATURE "Curvature strength" 0.5 0.0 1.5 0.05
#pragma parameter RT_CURV_TILT_ANGLE_H "Horizontal tilt" 0.0 -0.5 0.5 0.01
#pragma parameter RT_CURV_TILT_ANGLE_V "Vertical tilt" -0.08 -0.5 0.5 0.01
#pragma parameter RT_CURV_ROUNDED_CORNER "Rounded corner radius" 0.02 0.0 0.2 0.01
#pragma parameter RT_CURV_SHAPE "Screen shape (0 = sphere, 1 = cylinder)" 0.0 0.0 1.0 1.0
#pragma parameter RT_CURV_ASPECT_H "Horizontal aspect ratio (0 = unchanged)" 0.0 0.0 256.0 1.0
#pragma parameter RT_CURV_ASPECT_V "Vertical aspect ratio (0 = unchanged)" 0.0 0.0 256.0 1.0
#pragma parameter RT_CURV_ZOOM "Zoom" 0.99 0.8 1.2 0.01
#define RT_CURV_F_MAX 10.0
#pragma parameter RT_CURV_F "Focal Length (max = ortho)" 2.0 0.5 10.0 0.25
#pragma parameter RT_CURV_LOD_BIAS "Anti-aliasing boost" 0.0 0.0 1.0 0.05
#pragma parameter RT_CURV_APPEND "Disable pixel art sampling (for append)" 0.0 0.0 1.0 1.0
// clang-format on

View File

@ -0,0 +1,175 @@
#version 450
/*
Ray traced curvature v1.0 by fishku
Copyright (C) 2024
Public domain license (CC0)
This example demoes the following:
- How to generate rays for tracing against the screen surface in the vertex
shader.
- How to trace the rays in the fragment shader.
- How to achieve high-quality sampling with the found UV.
Changelog:
v1.0: Initial release.
*/
// If uncommented, pixel art is sampled sharply, better for stand-alone usage.
// If commented out, no coordinate distortion is done, which is better for
// appending. Enabling this option may give stronger aliasing artifacts when
// appending to other presets.
#define SHARP_PIXEL_SAMPLING
// If uncommented, mip-map levels are blended, if available, giving better AA.
#define TRILINEAR_SAMPLING
#include "../../../misc/shaders/input_transform/rotation.inc"
#include "parameters.inc"
#include "shared.inc"
layout(push_constant) uniform Push {
vec4 OriginalSize;
vec4 SourceSize;
vec4 OutputSize;
uint Rotation;
float RT_CURV_ASPECT_H;
float RT_CURV_ASPECT_V;
float RT_CURV_SHAPE;
float RT_CURV_CURVATURE;
float RT_CURV_ZOOM;
float RT_CURV_TILT_ANGLE_H;
float RT_CURV_TILT_ANGLE_V;
float RT_CURV_F;
float RT_CURV_ROUNDED_CORNER;
float RT_CURV_LOD_BIAS;
float RT_CURV_APPEND;
}
param;
layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; }
global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec3 prim_ray_o;
layout(location = 1) out vec3 prim_ray_d;
layout(location = 2) out vec3 plane_n;
layout(location = 3) out vec3 plane_u;
layout(location = 4) out vec3 plane_v;
layout(location = 5) out vec2 input_aspect;
void main() {
gl_Position = global.MVP * Position;
// Define curved object: sphere or cylinder.
// Simplification: Assume origin of curved object (sphere / cylinder) is at
// the origin.
const float r =
param.RT_CURV_CURVATURE > 1.0e-3 ? 1.0 / param.RT_CURV_CURVATURE : 1.0;
// TODO: Why does this need an extra flip compared to the usage in
// input_transform.inc?
const vec2 tilt = get_rotated_vector(vec2(param.RT_CURV_TILT_ANGLE_H,
param.RT_CURV_TILT_ANGLE_V),
param.Rotation) *
(1 - 2 * (int(param.Rotation) % 2));
const vec2 sin_tilt = sin(tilt);
const vec2 cos_tilt = cos(tilt);
// Simplification: Assume cylinder axis == plane_v.
// Define input texture plane.
// Basic vectors are:
// n = (0, 0, -r)
// u = (1, 0, 0)
// v = (0, 1, 0)
// n serves a double duty as the origin of the plane.
// These are then rotated around x by tilt.x and then around y by tilt.y.
plane_n = vec3(r * sin_tilt.x * cos_tilt.y, -r * sin_tilt.y,
-r * cos_tilt.x * cos_tilt.y);
plane_u = vec3(cos_tilt.x, 0.0, sin_tilt.x);
plane_v =
vec3(sin_tilt.x * sin_tilt.y, cos_tilt.y, -cos_tilt.x * sin_tilt.y);
// Compute aspects ratios.
// Output aspect ratio does not need to be rotated since RA takes care of
// output rotation.
input_aspect =
param.RT_CURV_ASPECT_H > 0.0 && param.RT_CURV_ASPECT_V > 0.0
? get_rotated_size(
vec2(param.RT_CURV_ASPECT_H, param.RT_CURV_ASPECT_V) /
max(param.RT_CURV_ASPECT_H, param.RT_CURV_ASPECT_V),
param.Rotation)
: param.OriginalSize.xy /
max(param.OriginalSize.x, param.OriginalSize.y);
const vec2 output_aspect =
param.OutputSize.xy / max(param.OutputSize.x, param.OutputSize.y);
// Simplification: Assume plane_o = plane_n
generate_ray(TexCoord, input_aspect, output_aspect, plane_n, plane_u,
plane_v, param.RT_CURV_CURVATURE, param.RT_CURV_F,
param.RT_CURV_SHAPE, param.RT_CURV_ZOOM, prim_ray_o,
prim_ray_d);
}
#pragma stage fragment
layout(location = 0) in vec3 prim_ray_o;
layout(location = 1) in vec3 prim_ray_d;
layout(location = 2) in vec3 plane_n;
layout(location = 3) in vec3 plane_u;
layout(location = 4) in vec3 plane_v;
layout(location = 5) in vec2 input_aspect;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
void main() {
vec2 uv = trace_ray(input_aspect, prim_ray_o, prim_ray_d, plane_n, plane_u,
plane_v, param.RT_CURV_CURVATURE, param.RT_CURV_SHAPE);
// Compute rounded corner darkening.
const vec2 q =
input_aspect * (abs(uv - 0.5) - 0.5) + param.RT_CURV_ROUNDED_CORNER;
const float rounded_rect_dist = min(max(q.x, q.y), 0.0) +
length(max(q, 0.0)) -
param.RT_CURV_ROUNDED_CORNER;
const float corner_darkening = smoothstep(
-max(param.OutputSize.z, param.OutputSize.w), 0.0, -rounded_rect_dist);
// Sampling section follows.
#if defined(SHARP_PIXEL_SAMPLING) || defined(TRILINEAR_SAMPLING)
const vec2 d_uv_dx = dFdx(uv) * param.SourceSize.xy;
const vec2 d_uv_dy = dFdy(uv) * param.SourceSize.xy;
#endif
#ifdef SHARP_PIXEL_SAMPLING
if (param.RT_CURV_APPEND < 0.5) {
// Do a sharp "pixel art" sampling, following:
// https://www.youtube.com/watch?v=d6tp43wZqps
// Only apply pixel art sampling when not appending to avoid aliasing.
const vec2 box_size = clamp(abs(d_uv_dx) + abs(d_uv_dy), 1.0e-6, 1.0);
const vec2 tx = uv * param.SourceSize.xy - 0.5 * box_size;
const vec2 tx_offset = smoothstep(1 - box_size, vec2(1.0), fract(tx));
uv = (floor(tx) + 0.5 + tx_offset) * param.SourceSize.zw;
}
#endif
#ifdef TRILINEAR_SAMPLING
// Anisotropic trilinear filtering.
// Implement in software because current implementation is broken. See:
// https://github.com/libretro/RetroArch/issues/16567
const float lambda_base =
max(0.0,
0.5 * log2(max(dot(d_uv_dx, d_uv_dx), dot(d_uv_dy, d_uv_dy)))) +
param.RT_CURV_LOD_BIAS;
float lambda_i;
const float lambda_f = modf(lambda_base, lambda_i);
FragColor = vec4(mix(textureLod(Source, uv, lambda_i).rgb,
textureLod(Source, uv, lambda_i + 1.0).rgb, lambda_f),
1.0);
#else
FragColor = vec4(textureLod(Source, uv, param.RT_CURV_LOD_BIAS).rgb, 1.0);
#endif
// Apply rounded corner darkening.
FragColor.rgb *= corner_darkening;
}

View File

@ -0,0 +1,122 @@
// See the main shader file for copyright and other information.
// Intersects 2D lines, defined as normal vector (.x and .y) and offset (.z).
vec2 line_intersection(vec3 l1, vec3 l2) {
// Simplification: Assume lines are not parallel.
const float inv_det = 1.0 / (l1.x * l2.y - l2.x * l1.y);
return vec2((l2.y * l1.z - l1.y * l2.z) * inv_det,
(l1.x * l2.z - l2.x * l1.z) * inv_det);
}
void generate_ray(vec2 tex_coord, vec2 input_aspect, vec2 output_aspect,
vec3 plane_o, vec3 plane_u, vec3 plane_v, float curv, float f,
float shape, float zoom, inout vec3 prim_ray_o,
inout vec3 prim_ray_d) {
// Figure out optimal camera position from 9 points sampled across the
// frame. We want to find the camera position that is as close as possible
// to the points, maximizing the points in the frustum view.
vec3 half_spaces[4] = {vec3(f, 0.5 * output_aspect.x, 1.0e7),
vec3(-f, 0.5 * output_aspect.x, 1.0e7),
vec3(f, 0.5 * output_aspect.y, 1.0e7),
vec3(-f, 0.5 * output_aspect.y, 1.0e7)};
vec3 p_min = vec3(1.0e7);
vec3 p_max = vec3(-1.0e7);
for (int i = -1; i < 2; ++i) {
for (int j = -1; j < 2; ++j) {
const vec2 uv = vec2(i * 0.5, j * 0.5) * input_aspect;
vec3 p = plane_o + uv.x * plane_u + uv.y * plane_v;
if (curv > 1.0e-3) {
// Simplification: Assume shape = 0 for sphere, = 1
// for cylinder. This allows multiplication instead of
// branching.
// Simplification: Assume cylinder axis == plane_v.
const vec3 p_on_ax = shape * dot(p, plane_v) * plane_v;
p = p_on_ax + normalize(p - p_on_ax) / curv;
}
half_spaces[0].z =
min(half_spaces[0].z, dot(half_spaces[0].xy, p.xz));
half_spaces[1].z =
min(half_spaces[1].z, dot(half_spaces[1].xy, p.xz));
half_spaces[2].z =
min(half_spaces[2].z, dot(half_spaces[2].xy, p.yz));
half_spaces[3].z =
min(half_spaces[3].z, dot(half_spaces[3].xy, p.yz));
p_min = min(p_min, p);
p_max = max(p_max, p);
}
}
// Generate camera ray.
if (f < RT_CURV_F_MAX) {
// Perspective camera.
const vec2 i_xz = line_intersection(half_spaces[0], half_spaces[1]);
const vec2 i_yz = line_intersection(half_spaces[2], half_spaces[3]);
const float ideal_cam_z = min(i_xz[1], i_yz[1]);
prim_ray_o =
vec3(i_xz[0], i_yz[0], p_min.z + (ideal_cam_z - p_min.z) / zoom);
prim_ray_d = vec3((tex_coord - 0.5) * output_aspect, f);
} else {
// Orthographic camera.
const vec3 p_extent = p_max - p_min;
const vec2 p_center = 0.5 * (p_min.xy + p_max.xy);
prim_ray_o = vec3(p_center + (tex_coord - 0.5) * output_aspect *
max(p_extent.x / output_aspect.x,
p_extent.y / output_aspect.y) /
zoom,
p_min.z - 1.0);
prim_ray_d = vec3(0.0, 0.0, 1.0);
}
}
vec2 trace_ray(vec2 input_aspect, vec3 prim_ray_o, vec3 prim_ray_d,
vec3 plane_n, vec3 plane_u, vec3 plane_v, float curv,
float shape) {
vec3 sec_ray_o = prim_ray_o;
vec3 sec_ray_d = prim_ray_d;
if (curv > 1.0e-3) {
// Intersect sphere / cylinder.
// Simplification: Assume shape = 0 for sphere, = 1 for
// cylinder. This allows multiplication instead of branching.
// Simplification: Assume cylinder axis == plane_v.
const vec3 alpha =
prim_ray_d - shape * dot(prim_ray_d, plane_v) * plane_v;
const vec3 beta =
prim_ray_o - shape * dot(prim_ray_o, plane_v) * plane_v;
const float half_b = dot(alpha, beta);
const float c = dot(beta, beta) - 1.0 / (curv * curv);
// Simplification: a = dot(alpha, alpha).
const float discriminant = half_b * half_b - dot(alpha, alpha) * c;
if (discriminant < 0.0) {
// Ray misses screen surface entirely.
return vec2(-1.0);
}
// We only need the smaller root of the two solutions for the ray-object
// intersection. The smaller root can be found as c / q, according to:
// https://www.av8n.com/physics/quadratic-formula.htm
// Simplification: Assume the solution is positive.
// Simplification: Assume half_b < 0.
// Simplification: p_screen = sec_ray_o.
sec_ray_o = prim_ray_o + c / (sqrt(discriminant) - half_b) * prim_ray_d;
// Simplification: Assume shape = 0 for sphere, = 1 for
// cylinder. This allows multiplication instead of branching.
sec_ray_d = sec_ray_o - shape * dot(sec_ray_o, plane_v) * plane_v;
}
// Intersect plane.
// Simplification:
// t = dot(plane_o - sec_ray_o, plane_n) / dot(plane_n, sec_ray_d).
// Simplification: Assume t > 0.
// Simplification: Assume denominator is not close to zero.
// Simplification: p_plane = sec_ray_o + dot(plane_o - sec_ray_o, plane_n) /
// dot(plane_n, sec_ray_d) * sec_ray_d;
const vec3 op = sec_ray_o +
dot(plane_n - sec_ray_o, plane_n) /
dot(plane_n, sec_ray_d) * sec_ray_d -
plane_n;
// Convert plane intersection to input UV.
return vec2(dot(op, plane_u / input_aspect.x),
dot(op, plane_v / input_aspect.y)) +
0.5;
}

View File

@ -30,45 +30,7 @@
v1.0: Initial conversion from blur_fill release. Add rotation support. v1.0: Initial conversion from blur_fill release. Add rotation support.
*/ */
vec2 get_rotated_size(vec2 x, uint rotation) { #include "rotation.inc"
switch (rotation) {
case 0:
case 2:
default:
return x;
case 1:
case 3:
return x.yx;
}
}
vec4 get_rotated_crop(vec4 crop, uint rotation) {
switch (rotation) {
case 0:
default:
return crop;
case 1:
return crop.yzwx;
case 2:
return crop.zwxy;
case 3:
return crop.wxyz;
}
}
vec2 get_rotated_vector(vec2 x, uint rotation) {
switch (rotation) {
case 0:
default:
return x;
case 1:
return vec2(-x.y, x.x);
case 2:
return -x;
case 3:
return vec2(x.y, -x.x);
}
}
// Get 2 corners of input in texel space, spanning the input image. // Get 2 corners of input in texel space, spanning the input image.
// corners.x and .y define the top-left corner, corners.z and .w define the // corners.x and .y define the top-left corner, corners.z and .w define the

View File

@ -0,0 +1,41 @@
// See input_transform.inc for copyright and other information.
vec2 get_rotated_size(vec2 x, uint rotation) {
switch (rotation) {
case 0:
case 2:
default:
return x;
case 1:
case 3:
return x.yx;
}
}
vec4 get_rotated_crop(vec4 crop, uint rotation) {
switch (rotation) {
case 0:
default:
return crop;
case 1:
return crop.yzwx;
case 2:
return crop.zwxy;
case 3:
return crop.wxyz;
}
}
vec2 get_rotated_vector(vec2 x, uint rotation) {
switch (rotation) {
case 0:
default:
return x;
case 1:
return vec2(-x.y, x.x);
case 2:
return -x;
case 3:
return vec2(x.y, -x.x);
}
}