Fishcu/pixel aa xform (#585)

* Upstream optimizations from GLSL; split out vert shader

* Revert splitting of pix aa vert shader; Implement xform variant
This commit is contained in:
fishcu 2024-05-02 02:00:04 +02:00 committed by GitHub
parent f047ae72aa
commit 3378d01600
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 219 additions and 55 deletions

View File

@ -1,7 +1,7 @@
#version 450
/*
Blur fill v1.8 by fishku
Blur fill v1.9 by fishku
Copyright (C) 2023
Public domain license (CC0)
@ -105,7 +105,7 @@ void main() {
vec2(param.FORCE_INTEGER_SCALING_H, param.FORCE_INTEGER_SCALING_V),
param.OVERSCALE,
/* output_size_is_final_viewport_size = */ false);
vec2 shift = vec2(param.SHIFT_H, param.SHIFT_V);
const vec2 shift = vec2(param.SHIFT_H, param.SHIFT_V);
tx_coord = o2i(vTexCoord, param.InputSize.xy, crop, shift, param.Rotation,
param.CENTER_AFTER_CROPPING, scale_o2i);
tx_per_px = scale_o2i * param.OutputSize.zw;

View File

@ -0,0 +1,5 @@
shaders = 1
shader0 = shaders/pixel_aa/pixel_aa_xform.slang
filter_linear0 = true
scale_type0 = viewport

View File

@ -1,7 +1,7 @@
// See pixel_aa.slang for copyright and other information.
// clang-format off
#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.4 settings ===" 0.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.5 settings ===" 0.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.5 0.0 2.0 0.05
#pragma parameter PIX_AA_GAMMA "Enable gamma-correct blending" 1.0 0.0 1.0 1.0
#pragma parameter PIX_AA_SUBPX "Enable subpixel AA" 0.0 0.0 1.0 1.0

View File

@ -1,7 +1,7 @@
#version 450
/*
Pixel AA v1.4 by fishku
Pixel AA v1.5 by fishku
Copyright (C) 2023
Public domain license (CC0)
@ -24,6 +24,7 @@
subpixel anti-aliasing, results are identical to the "pixellate" shader.
Changelog:
v1.5: Upstream optimizations from GLSL port. Add free transform preset.
v1.4: Enable subpixel sampling for all four pixel layout orientations,
including rotated screens.
v1.3: Account for screen rotation in subpixel sampling.

View File

@ -0,0 +1,100 @@
#version 450
// See pixel_aa.slang for copyright and other information.
// clang-format off
#include "../../../misc/shaders/input_transform/parameters.inc"
#include "parameters.inc"
#include "shared.inc"
#include "../../../misc/shaders/input_transform/input_transform.inc"
// clang-format on
layout(push_constant) uniform Push {
vec4 SourceSize;
vec4 OutputSize;
uint Rotation;
// Own settings
float PIX_AA_SHARP;
float PIX_AA_GAMMA;
float PIX_AA_SUBPX;
float PIX_AA_SUBPX_ORIENTATION;
// From input transform library, scaling section
float FORCE_ASPECT_RATIO;
float ASPECT_H;
float ASPECT_V;
float FORCE_INTEGER_SCALING_H;
float FORCE_INTEGER_SCALING_V;
float OVERSCALE;
// From input transform library, cropping section
float OS_CROP_TOP;
float OS_CROP_BOTTOM;
float OS_CROP_LEFT;
float OS_CROP_RIGHT;
// From input transform library, moving section
float SHIFT_H;
float SHIFT_V;
float CENTER_AFTER_CROPPING;
}
param;
layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; }
global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 tx_coord;
layout(location = 1) out vec2 tx_per_px;
layout(location = 2) out vec2 tx_to_uv;
layout(location = 3) out vec4 input_corners;
void main() {
gl_Position = global.MVP * Position;
const vec4 crop = vec4(param.OS_CROP_TOP, param.OS_CROP_LEFT,
param.OS_CROP_BOTTOM, param.OS_CROP_RIGHT);
const vec2 scale_o2i = get_scale_o2i(
param.SourceSize.xy, param.OutputSize.xy, crop, param.Rotation,
param.CENTER_AFTER_CROPPING, param.FORCE_ASPECT_RATIO,
vec2(param.ASPECT_H, param.ASPECT_V),
vec2(param.FORCE_INTEGER_SCALING_H, param.FORCE_INTEGER_SCALING_V),
param.OVERSCALE,
/* output_size_is_final_viewport_size = */ false);
const vec2 shift = vec2(param.SHIFT_H, param.SHIFT_V);
tx_coord = o2i(TexCoord, param.SourceSize.xy, crop, shift, param.Rotation,
param.CENTER_AFTER_CROPPING, scale_o2i);
tx_per_px = scale_o2i * param.OutputSize.zw;
tx_to_uv = param.SourceSize.zw;
input_corners =
get_input_corners(param.SourceSize.xy, crop, param.Rotation);
}
#pragma stage fragment
layout(location = 0) in vec2 tx_coord;
layout(location = 1) in vec2 tx_per_px;
layout(location = 2) in vec2 tx_to_uv;
layout(location = 3) in vec4 input_corners;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
// Returns y in [0, 1]
// l: lower center point of transition
// u: upper center point of transition
// s: transition width
vec2 trapezoid(vec2 x, vec2 l, vec2 u, vec2 s) {
return clamp((s + u - l - abs(2.0 * x - u - l)) / (2.0 * s), 0.0, 1.0);
}
void main() {
FragColor =
pixel_aa(Source, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP,
param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SUBPX > 0.5,
uint(param.PIX_AA_SUBPX_ORIENTATION), param.Rotation);
// Blend with background.
// Gamma correctness is ignored.
const vec2 w =
trapezoid(tx_coord, input_corners.xy, input_corners.zw, tx_per_px);
FragColor.rgb *= w.x * w.y;
}

View File

@ -9,77 +9,135 @@
const T o = (1.0 + s) * 0.5; \
return o - 0.5 * s * pow(2.0 * (o - s * x), T(slope)); \
}
INSTANTIATE_SLOPESTEP(float)
INSTANTIATE_SLOPESTEP(vec2)
float to_lin(float x) { return pow(x, 2.2); }
vec3 to_lin(vec3 x) { return pow(x, vec3(2.2)); }
float to_srgb(float x) { return pow(x, 1.0 / 2.2); }
vec3 to_srgb(vec3 x) { return pow(x, vec3(1.0 / 2.2)); }
// Function to get a single sample using the "pixel AA" method.
// Params:
// tx_coord: Coordinate in source pixel (texel) coordinates
vec3 sample_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
float sharpness, bool gamma_correct) {
// The offset for interpolation is a periodic function with
// a period length of 1 texel.
// The input coordinate is shifted so that the center of the texel
// aligns with the start of the period.
// First, get the period and phase.
vec2 period;
const vec2 phase = modf(tx_coord - 0.5, period);
// The function starts at 0, then starts transitioning at
// 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5,
// Then reaches 1 at 0.5 + 0.5 / pixels_per_texel.
// For sharpness values < 1.0, blend to bilinear filtering.
const vec2 offset =
slopestep(min(1.0, sharpness) * (0.5 - 0.5 * tx_per_px),
1.0 - min(1.0, sharpness) * (1.0 - (0.5 + 0.5 * tx_per_px)),
phase, max(1.0, sharpness));
// With gamma correct blending, we have to do 4 taps and interpolate
// manually. Without it, we can make use of a single tap using bilinear
// interpolation. The offsets are shifted back to the texel center before
// sampling.
if (gamma_correct) {
const vec3 samples[] = {
to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb),
to_lin(texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).rgb),
to_lin(texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).rgb),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb)};
return to_srgb(mix(mix(samples[0], samples[1], offset.x),
mix(samples[2], samples[3], offset.x), offset.y));
} else {
return texture(tex, (period + 0.5 + offset) * tx_to_uv).rgb;
}
}
// Function to get a pixel value, taking into consideration possible subpixel
// interpolation.
vec4 pixel_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord,
float sharpness, bool gamma_correct, bool sample_subpx,
uint subpx_orientation, uint screen_rotation) {
const float sharpness_upper = min(1.0, sharpness);
const vec2 sharp_lb = sharpness_upper * (0.5 - 0.5 * tx_per_px);
const vec2 sharp_ub =
1.0 - sharpness_upper * (1.0 - (0.5 + 0.5 * tx_per_px));
const float sharpness_lower = max(1.0, sharpness);
if (sample_subpx) {
// Subpixel sampling: Shift the sampling by 1/3rd of an output pixel for
// each subpixel, assuming that the output size is at monitor
// resolution.
// Account for different subpixel orientations and also for a possible
// rotation of the screen in certain cores.
const vec2 rotation_correction[] = {vec2(1.0, 0.0), vec2(0.0, 1.0),
vec2(-1.0, 0.0), vec2(0.0, -1.0)};
// Compensate for possible rotation of the screen in certain cores.
const vec4 rot_corr = vec4(1.0, 0.0, -1.0, 0.0);
const vec2 sub_tx_offset =
tx_per_px / 3.0 *
rotation_correction[(screen_rotation + subpx_orientation) % 4];
vec2(rot_corr[(screen_rotation + subpx_orientation) % 4],
rot_corr[(screen_rotation + subpx_orientation + 3) % 4]);
vec3 res;
for (int i = -1; i < 2; ++i) {
res[i + 1] = sample_aa(tex, tx_per_px, tx_to_uv,
tx_coord + sub_tx_offset * float(i),
sharpness, gamma_correct)[i + 1];
vec2 period, phase, offset;
if (gamma_correct) {
// Red
period = floor(tx_coord - sub_tx_offset - 0.5);
phase = tx_coord - sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.r = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).r),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).r),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).r),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).r),
offset.x),
offset.y));
// Green
period = floor(tx_coord - 0.5);
phase = tx_coord - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.g = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).g),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).g),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).g),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).g),
offset.x),
offset.y));
// Blue
period = floor(tx_coord + sub_tx_offset - 0.5);
phase = tx_coord + sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.b = to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).b),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).b),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).b),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).b),
offset.x),
offset.y));
} else {
// Red
period = floor(tx_coord - sub_tx_offset - 0.5);
phase = tx_coord - sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.r = texture(tex, (period + 0.5 + offset) * tx_to_uv).r;
// Green
period = floor(tx_coord - 0.5);
phase = tx_coord - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.g = texture(tex, (period + 0.5 + offset) * tx_to_uv).g;
// Blue
period = floor(tx_coord + sub_tx_offset - 0.5);
phase = tx_coord + sub_tx_offset - 0.5 - period;
offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
res.b = texture(tex, (period + 0.5 + offset) * tx_to_uv).b;
}
return vec4(res, 1.0);
} else {
return vec4(sample_aa(tex, tx_per_px, tx_to_uv, tx_coord, sharpness,
gamma_correct),
1.0);
// The offset for interpolation is a periodic function with
// a period length of 1 texel.
// The input coordinate is shifted so that the center of the texel
// aligns with the start of the period.
// First, get the period and phase.
vec2 period = floor(tx_coord - 0.5);
vec2 phase = tx_coord - 0.5 - period;
// The function starts at 0, then starts transitioning at
// 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5,
// Then reaches 1 at 0.5 + 0.5 / pixels_per_texel.
// For sharpness values < 1.0, blend to bilinear filtering.
vec2 offset = slopestep(sharp_lb, sharp_ub, phase, sharpness_lower);
// With gamma correct blending, we have to do 4 taps and interpolate
// manually. Without it, we can make use of a single tap using bilinear
// interpolation. The offsets are shifted back to the texel center
// before sampling.
if (gamma_correct) {
return vec4(
to_srgb(mix(
mix(to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb),
to_lin(
texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv)
.rgb),
offset.x),
mix(to_lin(
texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv)
.rgb),
to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb),
offset.x),
offset.y)),
1.0);
} else {
return texture(tex, (period + 0.5 + offset) * tx_to_uv);
}
}
}