mirror of
https://github.com/libretro/slang-shaders.git
synced 2024-11-26 18:10:33 +00:00
Add crt-royale-fast shaders (#619)
This commit is contained in:
parent
b327343b77
commit
111fcedc3b
93
crt/crt-royale-fast.slangp
Normal file
93
crt/crt-royale-fast.slangp
Normal file
@ -0,0 +1,93 @@
|
||||
# crt-royale-fast: a fast crt-royale adapted from original sources by Hyllian (2024).
|
||||
|
||||
shaders = "8"
|
||||
|
||||
textures = "mask_grille_texture_small;mask_slot_texture_small;mask_shadow_texture_small"
|
||||
mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64BGR.png"
|
||||
mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
|
||||
mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
|
||||
mask_grille_texture_small_wrap_mode = "repeat"
|
||||
mask_slot_texture_small_wrap_mode = "repeat"
|
||||
mask_shadow_texture_small_wrap_mode = "repeat"
|
||||
mask_grille_texture_small_linear = "true"
|
||||
mask_slot_texture_small_linear = "true"
|
||||
mask_shadow_texture_small_linear = "true"
|
||||
mask_grille_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
mask_slot_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
mask_shadow_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
|
||||
# Pass0: Linearize the input based on CRT gamma and bob interlaced fields.
|
||||
# (Bobbing ensures we can immediately blur without getting artifacts.)
|
||||
shader0 = "shaders/crt-royale/src-fast/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang"
|
||||
alias0 = "ORIG_LINEARIZED"
|
||||
filter_linear0 = "false"
|
||||
scale_type0 = "source"
|
||||
scale0 = "1.0"
|
||||
srgb_framebuffer0 = "true"
|
||||
|
||||
|
||||
# Pass1: Resample interlaced scanlines vertically.
|
||||
# Separating vertical/horizontal scanline sampling is faster: It lets us
|
||||
# consider more scanlines while calculating weights for fewer pixels, and
|
||||
# it reduces our samples from vertical*horizontal to vertical+horizontal.
|
||||
# This has to come right after ORIG_LINEARIZED, because there's no
|
||||
# "original_source" scale_type we can use later.
|
||||
shader1 = "shaders/crt-royale/src-fast/crt-royale-scanlines-vertical-interlacing.slang"
|
||||
alias1 = "VERTICAL_SCANLINES"
|
||||
filter_linear1 = "true"
|
||||
scale_type_x1 = "source"
|
||||
scale_x1 = "1.0"
|
||||
scale_type_y1 = "viewport"
|
||||
scale_y1 = "1.0"
|
||||
srgb_framebuffer1 = "true"
|
||||
|
||||
# Pass2: Resize the phosphor mask vertically.
|
||||
shader2 = "shaders/crt-royale/src-fast/crt-royale-mask-resize-vertical.slang"
|
||||
filter_linear2 = "true"
|
||||
scale_type_x2 = "absolute"
|
||||
scale_x2 = "64"
|
||||
scale_type_y2 = "viewport"
|
||||
scale_y2 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
|
||||
#srgb_framebuffer2 = "false" # mask_texture is already assumed linear
|
||||
|
||||
# Pass3: Resize the phosphor mask horizontally. scale_x3 = scale_y5.
|
||||
shader3 = "shaders/crt-royale/src-fast/crt-royale-mask-resize-horizontal.slang"
|
||||
alias3 = "MASK_RESIZE"
|
||||
filter_linear3 = "false"
|
||||
scale_type_x3 = "viewport"
|
||||
scale_x3 = "0.0625"
|
||||
scale_type_y3 = "source"
|
||||
scale_y3 = "1.0"
|
||||
#srgb_framebuffer3 = "false" # mask_texture is already assumed linear
|
||||
|
||||
# Pass4: Resample scanlines horizontally, apply the phosphor mask.
|
||||
shader4 = "shaders/crt-royale/src-fast/crt-royale-scanlines-horizontal-apply-mask.slang"
|
||||
alias4 = "MASKED_SCANLINES"
|
||||
filter_linear4 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type4 = "viewport"
|
||||
scale4 = "1.0"
|
||||
srgb_framebuffer4 = "true"
|
||||
|
||||
# Pass5: Compute a brightpass. This will require reading the final mask.
|
||||
shader5 = "shaders/crt-royale/src-fast/crt-royale-brightpass.slang"
|
||||
alias5 = "BRIGHTPASS"
|
||||
filter_linear5 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type5 = "viewport"
|
||||
scale5 = "1.0"
|
||||
srgb_framebuffer5 = "true"
|
||||
|
||||
# Pass6: Blur the brightpass vertically
|
||||
shader6 = "shaders/crt-royale/src-fast/crt-royale-bloom-vertical.slang"
|
||||
filter_linear6 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type6 = "source"
|
||||
scale6 = "1.0"
|
||||
srgb_framebuffer6 = "true"
|
||||
|
||||
# Pass7: Blur the brightpass horizontally and combine it with the dimpass:
|
||||
shader7 = "shaders/crt-royale/src-fast/crt-royale-bloom-horizontal-reconstitute.slang"
|
||||
filter_linear7 = "true"
|
||||
scale_type7 = "source"
|
||||
scale7 = "1.0"
|
||||
srgb_framebuffer7 = "true"
|
||||
wrap_mode7 = "clamp_to_edge"
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 5.1 KiB |
158
crt/shaders/crt-royale/src-fast/bind-shader-params.h
Normal file
158
crt/shaders/crt-royale/src-fast/bind-shader-params.h
Normal file
@ -0,0 +1,158 @@
|
||||
#ifndef BIND_SHADER_PARAMS_H
|
||||
#define BIND_SHADER_PARAMS_H
|
||||
|
||||
/*
|
||||
crt-royale-fast: a fast crt-royale adapted from original sources by Hyllian (2024).
|
||||
|
||||
Aims to deliver a fast shader with crt-royale visual style by sacrificing some
|
||||
of its complex features.
|
||||
*/
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform UBO
|
||||
{
|
||||
mat4 MVP;
|
||||
float crt_gamma;
|
||||
float lcd_gamma;
|
||||
float levels_contrast;
|
||||
float bloom_underestimate_levels;
|
||||
float bloom_excess;
|
||||
float beam_min_sigma;
|
||||
float beam_max_sigma;
|
||||
float beam_spot_power;
|
||||
float beam_min_shape;
|
||||
float beam_max_shape;
|
||||
float beam_shape_power;
|
||||
float beam_horiz_filter;
|
||||
float beam_horiz_sigma;
|
||||
float beam_horiz_linear_rgb_weight;
|
||||
float mask_type;
|
||||
float mask_triad_size_desired;
|
||||
float geom_aspect_ratio_x;
|
||||
float geom_aspect_ratio_y;
|
||||
float interlace_bff;
|
||||
float interlace_1080i;
|
||||
float interlace_detect_toggle;
|
||||
} global;
|
||||
|
||||
|
||||
#pragma parameter crt_gamma "Simulated CRT Gamma" 2.5 1.0 5.0 0.025
|
||||
#define crt_gamma global.crt_gamma
|
||||
#pragma parameter lcd_gamma "Your Display Gamma" 2.2 1.0 5.0 0.025
|
||||
#define lcd_gamma global.lcd_gamma
|
||||
#pragma parameter levels_contrast "Contrast" 1.0 0.0 4.0 0.015625
|
||||
#define levels_contrast global.levels_contrast
|
||||
#pragma parameter bloom_underestimate_levels "Bloom - Underestimate Levels" 0.8 0.0 5.0 0.01
|
||||
#define bloom_underestimate_levels global.bloom_underestimate_levels
|
||||
#pragma parameter bloom_excess "Bloom - Excess" 0.0 0.0 1.0 0.005
|
||||
#pragma parameter beam_min_sigma "Beam - Min Sigma" 0.02 0.005 1.0 0.005
|
||||
#define beam_min_sigma global.beam_min_sigma
|
||||
#pragma parameter beam_max_sigma "Beam - Max Sigma" 0.3 0.005 1.0 0.005
|
||||
#define beam_max_sigma global.beam_max_sigma
|
||||
#pragma parameter beam_spot_power "Beam - Spot Power" 0.33 0.01 16.0 0.01
|
||||
#define beam_spot_power global.beam_spot_power
|
||||
#pragma parameter beam_min_shape "Beam - Min Shape" 2.0 2.0 32.0 0.1
|
||||
#define beam_min_shape global.beam_min_shape
|
||||
#pragma parameter beam_max_shape "Beam - Max Shape" 4.0 2.0 32.0 0.1
|
||||
#define beam_max_shape global.beam_max_shape
|
||||
#pragma parameter beam_shape_power "Beam - Shape Power" 0.25 0.01 16.0 0.01
|
||||
#define beam_shape_power global.beam_shape_power
|
||||
#pragma parameter beam_horiz_filter "Beam - Horiz Filter" 0.0 0.0 2.0 1.0
|
||||
#define beam_horiz_filter global.beam_horiz_filter
|
||||
#pragma parameter beam_horiz_sigma "Beam - Horiz Sigma" 0.35 0.0 0.67 0.005
|
||||
#define beam_horiz_sigma global.beam_horiz_sigma
|
||||
#pragma parameter beam_horiz_linear_rgb_weight "Beam - Horiz Linear RGB Weight" 1.0 0.0 1.0 0.01
|
||||
#pragma parameter mask_type "Mask - Type" 0.0 0.0 2.0 1.0
|
||||
#define mask_type global.mask_type
|
||||
#pragma parameter mask_triad_size_desired "Mask - Triad Size Desired" 3.0 1.0 18.0 0.125
|
||||
#pragma parameter interlace_detect_toggle "Interlacing - Toggle" 1.0 0.0 1.0 1.0
|
||||
bool interlace_detect = bool(global.interlace_detect_toggle);
|
||||
#pragma parameter interlace_bff "Interlacing - Bottom Field First" 0.0 0.0 1.0 1.0
|
||||
//#define interlace_bff global.interlace_bff
|
||||
#pragma parameter interlace_1080i "Interlace - Detect 1080i" 0.0 0.0 1.0 1.0
|
||||
#define interlace_1080i global.interlace_1080i
|
||||
|
||||
// LEVELS MANAGEMENT:
|
||||
float levels_autodim_temp = 0.5; // range (0, 1]
|
||||
|
||||
bool beam_generalized_gaussian = true;
|
||||
float beam_antialias_level = 1.0; // range [0, 2]
|
||||
float beam_spot_shape_function = 0.0;
|
||||
float beam_spot_power_static = 1.0/3.0; // range (0, 16]
|
||||
float beam_min_shape_static = 2.0; // range [2, 32]
|
||||
float beam_max_shape_static = 4.0; // range [2, 32]
|
||||
|
||||
// PHOSPHOR MASK:
|
||||
float mask_sinc_lobes = 3.0; // range [2, 4]
|
||||
float mask_min_allowed_triad_size = 2.0;
|
||||
|
||||
// PASS SCALES AND RELATED CONSTANTS:
|
||||
vec2 mask_resize_viewport_scale = vec2(0.0625, 0.0625);
|
||||
|
||||
// PHOSPHOR MASK TEXTURE CONSTANTS:
|
||||
vec2 mask_texture_small_size = vec2(64.0, 64.0);
|
||||
float mask_triads_per_tile = 8.0;
|
||||
float mask_grille_avg_color = 53.0/255.0;
|
||||
float mask_slot_avg_color = 46.0/255.0;
|
||||
float mask_shadow_avg_color = 50.0/255.0;
|
||||
|
||||
#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16
|
||||
|
||||
float bloom_approx_filter = 0.0;
|
||||
vec2 mask_resize_src_lut_size = mask_texture_small_size;
|
||||
float max_aa_base_pixel_border = 0.0;
|
||||
float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
|
||||
float max_tiled_pixel_border = max_aniso_pixel_border;
|
||||
float max_mask_texel_border = ceil(max_tiled_pixel_border);
|
||||
float max_mask_tile_border = max_mask_texel_border/
|
||||
(mask_min_allowed_triad_size * mask_triads_per_tile);
|
||||
float mask_resize_num_tiles = 1.0 + 2.0 * max_mask_tile_border;
|
||||
float mask_start_texels = max_mask_texel_border;
|
||||
float mask_resize_num_triads = mask_resize_num_tiles * mask_triads_per_tile;
|
||||
vec2 min_allowed_viewport_triads = vec2(mask_resize_num_triads) / mask_resize_viewport_scale;
|
||||
|
||||
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
|
||||
// done once per pixel (not 6 times) with runtime params. Don't reuse the
|
||||
// vertex shader calculations, so static versions can be constant-folded.
|
||||
float sigma_range = max(beam_max_sigma, beam_min_sigma) - beam_min_sigma;
|
||||
float shape_range = max(beam_max_shape, beam_min_shape) - beam_min_shape;
|
||||
|
||||
//////////////////////// COMMON MATHEMATICAL CONSTANTS ///////////////////////
|
||||
|
||||
float pi = 3.141592653589;
|
||||
float under_half = 0.4995;
|
||||
|
||||
// Provide accessors settings which still need "cooking:"
|
||||
float get_mask_amplify()
|
||||
{
|
||||
float mask_grille_amplify = 1.0/mask_grille_avg_color;
|
||||
float mask_slot_amplify = 1.0/mask_slot_avg_color;
|
||||
float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
|
||||
|
||||
return mask_type < 0.5 ? mask_grille_amplify :
|
||||
mask_type < 1.5 ? mask_slot_amplify :
|
||||
mask_shadow_amplify;
|
||||
}
|
||||
|
||||
#endif // BIND_SHADER_PARAMS_H
|
133
crt/shaders/crt-royale/src-fast/bloom-functions.h
Normal file
133
crt/shaders/crt-royale/src-fast/bloom-functions.h
Normal file
@ -0,0 +1,133 @@
|
||||
#ifndef BLOOM_FUNCTIONS_H
|
||||
#define BLOOM_FUNCTIONS_H
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
///////////////////////////////// DESCRIPTION ////////////////////////////////
|
||||
|
||||
// These utility functions and constants help several passes determine the
|
||||
// size and center texel weight of the phosphor bloom in a uniform manner.
|
||||
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "blur-functions.h"
|
||||
|
||||
/////////////////////////////// BLOOM CONSTANTS //////////////////////////////
|
||||
|
||||
// Compute constants with manual inlines of the functions below:
|
||||
float bloom_diff_thresh = 1.0/256.0;
|
||||
|
||||
/////////////////////////////////// HELPERS //////////////////////////////////
|
||||
|
||||
float get_min_sigma_to_blur_triad(float triad_size, float thresh)
|
||||
{
|
||||
// Requires: 1.) triad_size is the final phosphor triad size in pixels
|
||||
// 2.) thresh is the max desired pixel difference in the
|
||||
// blurred triad (e.g. 1.0/256.0).
|
||||
// Returns: Return the minimum sigma that will fully blur a phosphor
|
||||
// triad on the screen to an even color, within thresh.
|
||||
// This closed-form function was found by curve-fitting data.
|
||||
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
|
||||
|
||||
return -0.05168 + 0.6113*triad_size - 1.122*triad_size*sqrt(0.000416 + thresh);
|
||||
|
||||
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
|
||||
//return 0.5985*triad_size - triad_size*sqrt(thresh)
|
||||
}
|
||||
|
||||
float get_absolute_scale_blur_sigma(float thresh)
|
||||
{
|
||||
// Requires: 1.) min_expected_triads must be a global float. The number
|
||||
// of horizontal phosphor triads in the final image must be
|
||||
// >= min_allowed_viewport_triads.x for realistic results.
|
||||
// 2.) bloom_approx_scale_x must be a global float equal to the
|
||||
// absolute horizontal scale of BLOOM_APPROX.
|
||||
// 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
|
||||
// should be <= 1.1658025090 to keep the final result <
|
||||
// 0.62666015625 (the largest sigma ensuring the largest
|
||||
// unused texel weight stays < 1.0/256.0 for a 3x3 blur).
|
||||
// 4.) thresh is the max desired pixel difference in the
|
||||
// blurred triad (e.g. 1.0/256.0).
|
||||
// Returns: Return the minimum Gaussian sigma that will blur the pass
|
||||
// output as much as it would have taken to blur away
|
||||
// bloom_approx_scale_x horizontal phosphor triads.
|
||||
// Description:
|
||||
// BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd
|
||||
// use the same blur sigma as the actual phosphor bloom and scale it down
|
||||
// to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
|
||||
// we don't know the viewport size in this pass. Instead, we'll blur as
|
||||
// much as it would take to blur away min_allowed_viewport_triads.x. This
|
||||
// will blur "more than necessary" if the user actually uses more triads,
|
||||
// but that's not terrible either, because blurring a constant fraction of
|
||||
// the viewport may better resemble a true optical bloom anyway (since the
|
||||
// viewport will generally be about the same fraction of each player's
|
||||
// field of view, regardless of screen size and resolution).
|
||||
// Assume an extremely large viewport size for asymptotic results.
|
||||
|
||||
float min_sigma = get_min_sigma_to_blur_triad(max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
|
||||
|
||||
return bloom_approx_scale_x/max_viewport_size_x * min_sigma;
|
||||
}
|
||||
|
||||
float get_center_weight(float sigma)
|
||||
{
|
||||
// Given a Gaussian blur sigma, get the blur weight for the center texel.
|
||||
return get_fast_gaussian_weight_sum_inv(sigma);
|
||||
}
|
||||
|
||||
|
||||
float get_bloom_approx_sigma(float output_size_x_runtime, float estimated_viewport_size_x)
|
||||
{
|
||||
// Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
|
||||
// This is included for dynamic codepaths just in case the
|
||||
// following two globals are incorrect:
|
||||
// 2.) bloom_approx_size_x_for_skip should == the same
|
||||
// if PHOSPHOR_BLOOM_FAKE is #defined
|
||||
// 3.) bloom_approx_size_x should == the same otherwise
|
||||
// Returns: For gaussian4x4, return a dynamic small bloom sigma that's
|
||||
// as close to optimal as possible given available information.
|
||||
// For blur3x3, return the a static small bloom sigma that
|
||||
// works well for typical cases. Otherwise, we're using simple
|
||||
// bilinear filtering, so use static calculations.
|
||||
// Assume the default static value. This is a compromise that ensures
|
||||
// typical triads are blurred, even if unusually large ones aren't.
|
||||
float mask_num_triads_static = max(min_allowed_viewport_triads.x, mask_num_triads_desired_static);
|
||||
|
||||
// Assume an extremely large viewport size for asymptotic results:
|
||||
float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
|
||||
|
||||
// We're either using blur3x3 or bilinear filtering. The biggest
|
||||
// reason to choose blur3x3 is to avoid dynamic weights, so use a
|
||||
// static calculation.
|
||||
float output_size_x_static = bloom_approx_size_x;
|
||||
|
||||
float asymptotic_triad_size = max_viewport_size_x/mask_num_triads_static;
|
||||
float asymptotic_sigma = get_min_sigma_to_blur_triad(asymptotic_triad_size, bloom_diff_thresh);
|
||||
float bloom_approx_sigma = asymptotic_sigma * output_size_x_static/max_viewport_size_x;
|
||||
|
||||
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
|
||||
// try accounting for the Gaussian scanline sigma from the last pass
|
||||
// too; use the static default value:
|
||||
return length(vec2(bloom_approx_sigma, beam_max_sigma_static));
|
||||
}
|
||||
|
||||
#endif // BLOOM_FUNCTIONS_H
|
||||
|
148
crt/shaders/crt-royale/src-fast/blur-functions.h
Normal file
148
crt/shaders/crt-royale/src-fast/blur-functions.h
Normal file
@ -0,0 +1,148 @@
|
||||
#ifndef BLUR_FUNCTIONS_H
|
||||
#define BLUR_FUNCTIONS_H
|
||||
|
||||
///////////////////////////////// MIT LICENSE ////////////////////////////////
|
||||
|
||||
// Copyright (C) 2014 TroggleMonkey
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal in the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
// IN THE SOFTWARE.
|
||||
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
// Set static standard deviations, but allow users to override them with their
|
||||
// own constants (even non-static uniforms if they're okay with the speed hit):
|
||||
|
||||
// blurN_std_dev values are specified in terms of dxdy strides.
|
||||
// The defaults are the largest values that keep the largest unused
|
||||
// blur term on each side <= 1.0/256.0. (We could get away with more
|
||||
// or be more conservative, but this compromise is pretty reasonable.)
|
||||
float blur3_std_dev = 0.62666015625;
|
||||
float blur4_std_dev = 0.66171875;
|
||||
float blur5_std_dev = 0.9845703125;
|
||||
float blur6_std_dev = 1.02626953125;
|
||||
float blur7_std_dev = 1.36103515625;
|
||||
float blur8_std_dev = 1.4080078125;
|
||||
float blur9_std_dev = 1.7533203125;
|
||||
float blur10_std_dev = 1.80478515625;
|
||||
float blur11_std_dev = 2.15986328125;
|
||||
float blur12_std_dev = 2.215234375;
|
||||
float blur17_std_dev = 3.45535583496;
|
||||
float blur25_std_dev = 5.3409576416;
|
||||
float blur31_std_dev = 6.86488037109;
|
||||
float blur43_std_dev = 10.1852050781;
|
||||
|
||||
// error_blurring should be in [0.0, 1.0]. Higher values reduce ringing
|
||||
// in shared-sample blurs but increase blurring and feature shifting.
|
||||
float error_blurring = 0.5;
|
||||
|
||||
/////////////////////////////////// HELPERS //////////////////////////////////
|
||||
|
||||
|
||||
vec4 uv2_to_uv4(vec2 tex_uv)
|
||||
{
|
||||
// Make a vec2 uv offset safe for adding to vec4 tex2Dlod coords:
|
||||
return vec4(tex_uv, 0.0, 0.0);
|
||||
}
|
||||
|
||||
// Make a length squared helper macro (for usage with static constants):
|
||||
#define LENGTH_SQ(vec) (dot(vec, vec))
|
||||
|
||||
float get_fast_gaussian_weight_sum_inv(float sigma)
|
||||
{
|
||||
// We can use the Gaussian integral to calculate the asymptotic weight for
|
||||
// the center pixel. Since the unnormalized center pixel weight is 1.0,
|
||||
// the normalized weight is the same as the weight sum inverse. Given a
|
||||
// large enough blur (9+), the asymptotic weight sum is close and faster:
|
||||
// center_weight = 0.5 *
|
||||
// (erf(0.5/(sigma*sqrt(2.0))) - erf(-0.5/(sigma*sqrt(2.0))))
|
||||
// erf(-x) == -erf(x), so we get 0.5 * (2.0 * erf(blah blah)):
|
||||
// However, we can get even faster results with curve-fitting. These are
|
||||
// also closer than the asymptotic results, because they were constructed
|
||||
// from 64 blurs sizes from [3, 131) and 255 equally-spaced sigmas from
|
||||
// (0, blurN_std_dev), so the results for smaller sigmas are biased toward
|
||||
// smaller blurs. The max error is 0.0031793913.
|
||||
// Relative FPS: 134.3 with erf, 135.8 with curve-fitting.
|
||||
//float temp = 0.5/sqrt(2.0);
|
||||
//return erf(temp/sigma);
|
||||
return min(exp(exp(0.348348412457428/
|
||||
(sigma - 0.0860587260734721))), 0.399334576340352/sigma);
|
||||
}
|
||||
|
||||
|
||||
|
||||
vec3 tex2Dblur9resize(sampler2D tex, vec2 tex_uv,
|
||||
vec2 dxdy, float sigma)
|
||||
{
|
||||
// Requires: Global requirements must be met (see file description).
|
||||
// Returns: A 1D 9x Gaussian blurred texture lookup using a 9-tap blur.
|
||||
// It may be mipmapped depending on settings and dxdy.
|
||||
// First get the texel weights and normalization factor as above.
|
||||
float denom_inv = 0.5/(sigma*sigma);
|
||||
float w0 = 1.0;
|
||||
float w1 = exp(-1.0 * denom_inv);
|
||||
float w2 = exp(-4.0 * denom_inv);
|
||||
float w3 = exp(-9.0 * denom_inv);
|
||||
float w4 = exp(-16.0 * denom_inv);
|
||||
float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
|
||||
// Statically normalize weights, sum weighted samples, and return:
|
||||
vec3 sum = vec3(0.0,0.0,0.0);
|
||||
|
||||
sum += w4 * texture(tex, tex_uv - 4.0 * dxdy).rgb;
|
||||
sum += w3 * texture(tex, tex_uv - 3.0 * dxdy).rgb;
|
||||
sum += w2 * texture(tex, tex_uv - 2.0 * dxdy).rgb;
|
||||
sum += w1 * texture(tex, tex_uv - 1.0 * dxdy).rgb;
|
||||
sum += w0 * texture(tex, tex_uv).rgb;
|
||||
sum += w1 * texture(tex, tex_uv + 1.0 * dxdy).rgb;
|
||||
sum += w2 * texture(tex, tex_uv + 2.0 * dxdy).rgb;
|
||||
sum += w3 * texture(tex, tex_uv + 3.0 * dxdy).rgb;
|
||||
sum += w4 * texture(tex, tex_uv + 4.0 * dxdy).rgb;
|
||||
|
||||
return sum * weight_sum_inv;
|
||||
}
|
||||
|
||||
vec3 tex2Dblur17fastest(sampler2D tex, vec2 tex_uv,
|
||||
vec2 dxdy, float weight_sum_inv, vec4 w1_8, vec4 w1_8_ratio)
|
||||
{
|
||||
// Requires: Same as tex2Dblur11()
|
||||
// Returns: A 1D 17x Gaussian blurred texture lookup using 1 nearest
|
||||
// neighbor and 8 linear taps. It may be mipmapped depending
|
||||
// on settings and dxdy.
|
||||
// First get the texel weights and normalization factor as above.
|
||||
|
||||
float w0 = 1.0;
|
||||
|
||||
vec3 sum = vec3(0.0,0.0,0.0);
|
||||
|
||||
sum += (w1_8.w * texture(tex, tex_uv - (7.0 + w1_8_ratio.w) * dxdy).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv - (5.0 + w1_8_ratio.z) * dxdy).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv - (3.0 + w1_8_ratio.y) * dxdy).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv - (1.0 + w1_8_ratio.x) * dxdy).rgb);
|
||||
sum += (w0 * texture(tex, tex_uv).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv + (1.0 + w1_8_ratio.x) * dxdy).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv + (3.0 + w1_8_ratio.y) * dxdy).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv + (5.0 + w1_8_ratio.z) * dxdy).rgb);
|
||||
sum += (w1_8.w * texture(tex, tex_uv + (7.0 + w1_8_ratio.w) * dxdy).rgb);
|
||||
|
||||
return sum * weight_sum_inv;
|
||||
}
|
||||
|
||||
|
||||
#endif // BLUR_FUNCTIONS_H
|
||||
|
@ -0,0 +1,206 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
vec4 MASKED_SCANLINESSize;
|
||||
vec4 BRIGHTPASSSize;
|
||||
} params;
|
||||
|
||||
#define MASKED_SCANLINEStexture MASKED_SCANLINES
|
||||
#define MASKED_SCANLINEStexture_size params.MASKED_SCANLINESSize.xy
|
||||
#define MASKED_SCANLINESvideo_size params.MASKED_SCANLINESSize.xy
|
||||
#define BRIGHTPASStexture BRIGHTPASS
|
||||
#define BRIGHTPASStexture_size params.BRIGHTPASSSize.xy
|
||||
#define BRIGHTPASSvideo_size params.BRIGHTPASSSize.xy
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
/////////////////////////////// VERTEX INCLUDES //////////////////////////////
|
||||
|
||||
#include "scanline-functions.h"
|
||||
|
||||
#define GAMMA_OUT(color) pow(color, vec3(1.0 / lcd_gamma))
|
||||
|
||||
float bloom_diff_thresh_ = 1.0/256.0;
|
||||
float mask_min_allowed_tile_size = ceil(mask_min_allowed_triad_size * mask_triads_per_tile);
|
||||
|
||||
struct st_gauss{
|
||||
vec4 w1_8;
|
||||
vec4 w1_8_ratio;
|
||||
};
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 video_uv;
|
||||
layout(location = 1) out float bloom_sigma_runtime;
|
||||
layout(location = 2) out vec4 w1_8;
|
||||
layout(location = 3) out vec4 w1_8_ratio;
|
||||
layout(location = 4) out float weight_sum_inv;
|
||||
layout(location = 5) out float undim_mask_contrast_factors;
|
||||
|
||||
// copied from bloom-functions.h
|
||||
float get_min_sigma_to_blur_triad(float triad_size, float thresh)
|
||||
{
|
||||
return -0.05168 + 0.6113*triad_size - 1.122*triad_size*sqrt(0.000416 + thresh);
|
||||
}
|
||||
|
||||
float get_fast_gaussian_weight_sum_inv(float sigma)
|
||||
{
|
||||
return min(exp(exp(0.348348412457428/(sigma - 0.0860587260734721))), 0.399334576340352/sigma);
|
||||
}
|
||||
|
||||
st_gauss get_blur_fastest_w1_8(float sigma)
|
||||
{
|
||||
float denom_inv = 0.5/(sigma*sigma);
|
||||
float w0 = 1.0;
|
||||
float w1 = exp(-1.0 * denom_inv);
|
||||
float w2 = exp(-4.0 * denom_inv);
|
||||
float w3 = exp(-9.0 * denom_inv);
|
||||
float w4 = exp(-16.0 * denom_inv);
|
||||
float w5 = exp(-25.0 * denom_inv);
|
||||
float w6 = exp(-36.0 * denom_inv);
|
||||
float w7 = exp(-49.0 * denom_inv);
|
||||
float w8 = exp(-64.0 * denom_inv);
|
||||
|
||||
st_gauss blur_weights;
|
||||
|
||||
blur_weights.w1_8.x = w1 + w2;
|
||||
blur_weights.w1_8.y = w3 + w4;
|
||||
blur_weights.w1_8.z = w5 + w6;
|
||||
blur_weights.w1_8.w = w7 + w8;
|
||||
blur_weights.w1_8_ratio.x = w2/(blur_weights.w1_8.x) + 1.0;
|
||||
blur_weights.w1_8_ratio.y = w4/(blur_weights.w1_8.y) + 3.0;
|
||||
blur_weights.w1_8_ratio.z = w6/(blur_weights.w1_8.z) + 5.0;
|
||||
blur_weights.w1_8_ratio.w = w8/(blur_weights.w1_8.w) + 7.0;
|
||||
|
||||
return blur_weights;
|
||||
}
|
||||
|
||||
vec2 get_resized_mask_tile_size(vec2 estimated_viewport_size, vec2 estimated_mask_resize_output_size, bool solemnly_swear_same_inputs_for_every_pass)
|
||||
{
|
||||
// Stated tile properties must be correct:
|
||||
float tile_aspect_ratio_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
|
||||
float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
|
||||
vec2 tile_aspect = vec2(1.0, tile_aspect_ratio_inv);
|
||||
|
||||
float desired_tile_size_x = mask_triads_per_tile * global.mask_triad_size_desired;
|
||||
|
||||
// Make sure we're not upsizing:
|
||||
float temp_tile_size_x = min(desired_tile_size_x, mask_resize_src_lut_size.x);
|
||||
|
||||
// Enforce min_tile_size and max_tile_size in both dimensions:
|
||||
vec2 temp_tile_size = temp_tile_size_x * tile_aspect;
|
||||
vec2 min_tile_size = mask_min_allowed_tile_size * tile_aspect;
|
||||
vec2 max_tile_size = estimated_mask_resize_output_size / mask_resize_num_tiles;
|
||||
vec2 clamped_tile_size = clamp(temp_tile_size, min_tile_size, max_tile_size);
|
||||
|
||||
float x_tile_size_from_y = clamped_tile_size.y * tile_aspect_ratio;
|
||||
float y_tile_size_from_x = mix(clamped_tile_size.y, clamped_tile_size.x * tile_aspect_ratio_inv, float(solemnly_swear_same_inputs_for_every_pass));
|
||||
vec2 reclamped_tile_size = vec2(min(clamped_tile_size.x, x_tile_size_from_y), min(clamped_tile_size.y, y_tile_size_from_x));
|
||||
|
||||
// We need integer tile sizes in both directions for tiled sampling to
|
||||
// work correctly. Use floor (to make sure we don't round up), but be
|
||||
// careful to avoid a rounding bug where floor decreases whole numbers:
|
||||
vec2 final_resized_tile_size = floor(reclamped_tile_size + vec2(FIX_ZERO(0.0)));
|
||||
|
||||
return final_resized_tile_size;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
video_uv = TexCoord;
|
||||
|
||||
// Calculate a runtime bloom_sigma in case it's needed:
|
||||
float mask_tile_size_x = get_resized_mask_tile_size(params.OutputSize.xy, params.OutputSize.xy * mask_resize_viewport_scale, false).x;
|
||||
|
||||
bloom_sigma_runtime = get_min_sigma_to_blur_triad(mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
|
||||
|
||||
st_gauss blur_weights = get_blur_fastest_w1_8(bloom_sigma_runtime);
|
||||
|
||||
w1_8 = blur_weights.w1_8;
|
||||
w1_8_ratio = blur_weights.w1_8_ratio * params.SourceSize.z;
|
||||
|
||||
weight_sum_inv = get_fast_gaussian_weight_sum_inv(bloom_sigma_runtime);
|
||||
|
||||
float undim_factor = 1.0/levels_autodim_temp;
|
||||
|
||||
undim_mask_contrast_factors = undim_factor * get_mask_amplify() * levels_contrast;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#pragma stage fragment
|
||||
layout(location = 0) in vec2 video_uv;
|
||||
layout(location = 1) in float bloom_sigma_runtime;
|
||||
layout(location = 2) in vec4 w1_8;
|
||||
layout(location = 3) in vec4 w1_8_ratio;
|
||||
layout(location = 4) in float weight_sum_inv;
|
||||
layout(location = 5) in float undim_mask_contrast_factors;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
layout(set = 0, binding = 3) uniform sampler2D BRIGHTPASS;
|
||||
layout(set = 0, binding = 4) uniform sampler2D MASKED_SCANLINES;
|
||||
#define bloom_texture Source
|
||||
|
||||
////////////////////////////// FRAGMENT INCLUDES //////////////////////////////
|
||||
|
||||
vec3 tex2Dblur17fastest(sampler2D tex, vec2 tex_uv, float weight_sum_inv, vec4 w1_8, vec4 w1_8_ratio)
|
||||
{
|
||||
|
||||
float w0 = 1.0;
|
||||
|
||||
vec3 sum = vec3(0.0,0.0,0.0);
|
||||
|
||||
sum += (w1_8.w * texture(tex, tex_uv - vec2(w1_8_ratio.w, 0.0)).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv - vec2(w1_8_ratio.z, 0.0)).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv - vec2(w1_8_ratio.y, 0.0)).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv - vec2(w1_8_ratio.x, 0.0)).rgb);
|
||||
sum += (w0 * texture(tex, tex_uv).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv + vec2(w1_8_ratio.x, 0.0)).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv + vec2(w1_8_ratio.y, 0.0)).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv + vec2(w1_8_ratio.z, 0.0)).rgb);
|
||||
sum += (w1_8.w * texture(tex, tex_uv + vec2(w1_8_ratio.w, 0.0)).rgb);
|
||||
|
||||
return sum * weight_sum_inv;
|
||||
}
|
||||
|
||||
|
||||
void main()
|
||||
{
|
||||
vec3 blurred_brightpass = tex2Dblur17fastest(bloom_texture, video_uv, weight_sum_inv, w1_8, w1_8_ratio);
|
||||
|
||||
// Sample the masked scanlines. Alpha contains the auto-dim factor:
|
||||
vec3 intensity_dim = texture(MASKED_SCANLINEStexture, video_uv).rgb;
|
||||
|
||||
// Calculate the mask dimpass, add it to the blurred brightpass, and
|
||||
// undim (from scanline auto-dim) and amplify (from mask dim) the result:
|
||||
vec3 brightpass = texture(BRIGHTPASStexture, video_uv).rgb;
|
||||
vec3 phosphor_bloom = (intensity_dim - brightpass + blurred_brightpass) * undim_mask_contrast_factors;
|
||||
|
||||
FragColor = vec4(GAMMA_OUT(phosphor_bloom), 1.0);
|
||||
}
|
204
crt/shaders/crt-royale/src-fast/crt-royale-bloom-vertical.slang
Normal file
204
crt/shaders/crt-royale/src-fast/crt-royale-bloom-vertical.slang
Normal file
@ -0,0 +1,204 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
vec4 MASKED_SCANLINESSize;
|
||||
} params;
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
/////////////////////////////// VERTEX INCLUDES ///////////////////////////////
|
||||
|
||||
float bloom_diff_thresh_ = 1.0/256.0;
|
||||
float mask_min_allowed_tile_size = ceil(mask_min_allowed_triad_size * mask_triads_per_tile);
|
||||
|
||||
struct st_gauss{
|
||||
vec4 w1_8;
|
||||
vec4 w1_8_ratio;
|
||||
};
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 tex_uv;
|
||||
layout(location = 1) out float bloom_sigma_runtime;
|
||||
layout(location = 2) out vec4 w1_8;
|
||||
layout(location = 3) out vec4 w1_8_ratio;
|
||||
layout(location = 4) out float weight_sum_inv;
|
||||
|
||||
// copied from bloom-functions.h
|
||||
float get_min_sigma_to_blur_triad(float triad_size,
|
||||
float thresh)
|
||||
{
|
||||
// Requires: 1.) triad_size is the final phosphor triad size in pixels
|
||||
// 2.) thresh is the max desired pixel difference in the
|
||||
// blurred triad (e.g. 1.0/256.0).
|
||||
// Returns: Return the minimum sigma that will fully blur a phosphor
|
||||
// triad on the screen to an even color, within thresh.
|
||||
// This closed-form function was found by curve-fitting data.
|
||||
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
|
||||
return -0.05168 + 0.6113*triad_size - 1.122*triad_size*sqrt(0.000416 + thresh);
|
||||
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
|
||||
//return 0.5985*triad_size - triad_size*sqrt(thresh)
|
||||
}
|
||||
|
||||
float get_fast_gaussian_weight_sum_inv(float sigma)
|
||||
{
|
||||
// We can use the Gaussian integral to calculate the asymptotic weight for
|
||||
// the center pixel. Since the unnormalized center pixel weight is 1.0,
|
||||
// the normalized weight is the same as the weight sum inverse. Given a
|
||||
// large enough blur (9+), the asymptotic weight sum is close and faster:
|
||||
// center_weight = 0.5 *
|
||||
// (erf(0.5/(sigma*sqrt(2.0))) - erf(-0.5/(sigma*sqrt(2.0))))
|
||||
// erf(-x) == -erf(x), so we get 0.5 * (2.0 * erf(blah blah)):
|
||||
// However, we can get even faster results with curve-fitting. These are
|
||||
// also closer than the asymptotic results, because they were constructed
|
||||
// from 64 blurs sizes from [3, 131) and 255 equally-spaced sigmas from
|
||||
// (0, blurN_std_dev), so the results for smaller sigmas are biased toward
|
||||
// smaller blurs. The max error is 0.0031793913.
|
||||
// Relative FPS: 134.3 with erf, 135.8 with curve-fitting.
|
||||
//float temp = 0.5/sqrt(2.0);
|
||||
//return erf(temp/sigma);
|
||||
return min(exp(exp(0.348348412457428/(sigma - 0.0860587260734721))), 0.399334576340352/sigma);
|
||||
}
|
||||
|
||||
st_gauss get_blur_fastest_w1_8(float sigma)
|
||||
{
|
||||
float denom_inv = 0.5/(sigma*sigma);
|
||||
float w0 = 1.0;
|
||||
float w1 = exp(-1.0 * denom_inv);
|
||||
float w2 = exp(-4.0 * denom_inv);
|
||||
float w3 = exp(-9.0 * denom_inv);
|
||||
float w4 = exp(-16.0 * denom_inv);
|
||||
float w5 = exp(-25.0 * denom_inv);
|
||||
float w6 = exp(-36.0 * denom_inv);
|
||||
float w7 = exp(-49.0 * denom_inv);
|
||||
float w8 = exp(-64.0 * denom_inv);
|
||||
|
||||
st_gauss blur_weights;
|
||||
|
||||
blur_weights.w1_8.x = w1 + w2;
|
||||
blur_weights.w1_8.y = w3 + w4;
|
||||
blur_weights.w1_8.z = w5 + w6;
|
||||
blur_weights.w1_8.w = w7 + w8;
|
||||
blur_weights.w1_8_ratio.x = w2/(blur_weights.w1_8.x) + 1.0;
|
||||
blur_weights.w1_8_ratio.y = w4/(blur_weights.w1_8.y) + 3.0;
|
||||
blur_weights.w1_8_ratio.z = w6/(blur_weights.w1_8.z) + 5.0;
|
||||
blur_weights.w1_8_ratio.w = w8/(blur_weights.w1_8.w) + 7.0;
|
||||
|
||||
return blur_weights;
|
||||
}
|
||||
|
||||
vec2 get_resized_mask_tile_size(vec2 estimated_viewport_size, vec2 estimated_mask_resize_output_size, bool solemnly_swear_same_inputs_for_every_pass)
|
||||
{
|
||||
// Stated tile properties must be correct:
|
||||
float tile_aspect_ratio_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
|
||||
float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
|
||||
vec2 tile_aspect = vec2(1.0, tile_aspect_ratio_inv);
|
||||
|
||||
float desired_tile_size_x = mask_triads_per_tile * global.mask_triad_size_desired;
|
||||
|
||||
// Make sure we're not upsizing:
|
||||
float temp_tile_size_x = min(desired_tile_size_x, mask_resize_src_lut_size.x);
|
||||
|
||||
// Enforce min_tile_size and max_tile_size in both dimensions:
|
||||
vec2 temp_tile_size = temp_tile_size_x * tile_aspect;
|
||||
vec2 min_tile_size = mask_min_allowed_tile_size * tile_aspect;
|
||||
vec2 max_tile_size = estimated_mask_resize_output_size / mask_resize_num_tiles;
|
||||
vec2 clamped_tile_size = clamp(temp_tile_size, min_tile_size, max_tile_size);
|
||||
|
||||
float x_tile_size_from_y = clamped_tile_size.y * tile_aspect_ratio;
|
||||
float y_tile_size_from_x = mix(clamped_tile_size.y, clamped_tile_size.x * tile_aspect_ratio_inv, float(solemnly_swear_same_inputs_for_every_pass));
|
||||
vec2 reclamped_tile_size = vec2(min(clamped_tile_size.x, x_tile_size_from_y), min(clamped_tile_size.y, y_tile_size_from_x));
|
||||
|
||||
// We need integer tile sizes in both directions for tiled sampling to
|
||||
// work correctly. Use floor (to make sure we don't round up), but be
|
||||
// careful to avoid a rounding bug where floor decreases whole numbers:
|
||||
vec2 final_resized_tile_size = floor(reclamped_tile_size + vec2(FIX_ZERO(0.0)));
|
||||
|
||||
return final_resized_tile_size;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
tex_uv = TexCoord * 1.0001;
|
||||
|
||||
// Get the uv sample distance between output pixels. Calculate dxdy like
|
||||
// blurs/shaders/vertex-shader-blur-fast-vertical.h.
|
||||
vec2 dxdy_scale = params.SourceSize.xy/params.OutputSize.xy;
|
||||
vec2 dxdy = dxdy_scale/params.SourceSize.xy;
|
||||
|
||||
// Calculate a runtime bloom_sigma in case it's needed:
|
||||
float mask_tile_size_x = get_resized_mask_tile_size(params.OutputSize.xy, params.OutputSize.xy * mask_resize_viewport_scale, false).x;
|
||||
bloom_sigma_runtime = get_min_sigma_to_blur_triad(mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
|
||||
|
||||
st_gauss blur_weights = get_blur_fastest_w1_8(bloom_sigma_runtime);
|
||||
|
||||
w1_8 = blur_weights.w1_8;
|
||||
w1_8_ratio = blur_weights.w1_8_ratio * dxdy.y;
|
||||
|
||||
weight_sum_inv = get_fast_gaussian_weight_sum_inv(bloom_sigma_runtime);
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
#pragma format R8G8B8A8_SRGB
|
||||
layout(location = 0) in vec2 tex_uv;
|
||||
layout(location = 1) in float bloom_sigma_runtime;
|
||||
layout(location = 2) in vec4 w1_8;
|
||||
layout(location = 3) in vec4 w1_8_ratio;
|
||||
layout(location = 4) in float weight_sum_inv;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
#define input_texture Source
|
||||
|
||||
////////////////////////////// FRAGMENT INCLUDES //////////////////////////////
|
||||
|
||||
vec3 tex2Dblur17fastest(sampler2D tex, vec2 tex_uv, float weight_sum_inv, vec4 w1_8, vec4 w1_8_ratio)
|
||||
{
|
||||
|
||||
float w0 = 1.0;
|
||||
|
||||
vec3 sum = vec3(0.0,0.0,0.0);
|
||||
|
||||
sum += (w1_8.w * texture(tex, tex_uv - vec2(0.0, w1_8_ratio.w)).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv - vec2(0.0, w1_8_ratio.z)).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv - vec2(0.0, w1_8_ratio.y)).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv - vec2(0.0, w1_8_ratio.x)).rgb);
|
||||
sum += (w0 * texture(tex, tex_uv).rgb);
|
||||
sum += (w1_8.x * texture(tex, tex_uv + vec2(0.0, w1_8_ratio.x)).rgb);
|
||||
sum += (w1_8.y * texture(tex, tex_uv + vec2(0.0, w1_8_ratio.y)).rgb);
|
||||
sum += (w1_8.z * texture(tex, tex_uv + vec2(0.0, w1_8_ratio.z)).rgb);
|
||||
sum += (w1_8.w * texture(tex, tex_uv + vec2(0.0, w1_8_ratio.w)).rgb);
|
||||
|
||||
return sum * weight_sum_inv;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vec4(tex2Dblur17fastest(Source, tex_uv, weight_sum_inv, w1_8, w1_8_ratio), 1.0);
|
||||
}
|
150
crt/shaders/crt-royale/src-fast/crt-royale-brightpass.slang
Normal file
150
crt/shaders/crt-royale/src-fast/crt-royale-brightpass.slang
Normal file
@ -0,0 +1,150 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
vec4 MASKED_SCANLINESSize;
|
||||
} params;
|
||||
|
||||
#define MASKED_SCANLINEStexture MASKED_SCANLINES
|
||||
#define MASKED_SCANLINEStexture_size params.MASKED_SCANLINESSize.xy
|
||||
#define MASKED_SCANLINESvideo_size params.MASKED_SCANLINESSize.xy
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
/////////////////////////////// VERTEX INCLUDES ///////////////////////////////
|
||||
|
||||
float bloom_diff_thresh_ = 1.0/256.0;
|
||||
float mask_min_allowed_tile_size = ceil(mask_min_allowed_triad_size * mask_triads_per_tile);
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 tex_uv;
|
||||
layout(location = 1) out float center_weight;
|
||||
layout(location = 2) out float undim_mask_contrast_factors;
|
||||
|
||||
// copied from bloom-functions.h
|
||||
float get_min_sigma_to_blur_triad(float triad_size, float thresh)
|
||||
{
|
||||
return -0.05168 + 0.6113*triad_size - 1.122*triad_size*sqrt(0.000416 + thresh);
|
||||
}
|
||||
|
||||
float get_fast_gaussian_weight_sum_inv(float sigma)
|
||||
{
|
||||
return min(exp(exp(0.348348412457428/(sigma - 0.0860587260734721))), 0.399334576340352/sigma);
|
||||
}
|
||||
|
||||
vec2 get_resized_mask_tile_size(vec2 estimated_viewport_size, vec2 estimated_mask_resize_output_size, bool solemnly_swear_same_inputs_for_every_pass)
|
||||
{
|
||||
// Stated tile properties must be correct:
|
||||
float tile_aspect_ratio_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
|
||||
float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
|
||||
vec2 tile_aspect = vec2(1.0, tile_aspect_ratio_inv);
|
||||
|
||||
float desired_tile_size_x = mask_triads_per_tile * global.mask_triad_size_desired;
|
||||
|
||||
// Make sure we're not upsizing:
|
||||
float temp_tile_size_x = min(desired_tile_size_x, mask_resize_src_lut_size.x);
|
||||
|
||||
// Enforce min_tile_size and max_tile_size in both dimensions:
|
||||
vec2 temp_tile_size = temp_tile_size_x * tile_aspect;
|
||||
vec2 min_tile_size = mask_min_allowed_tile_size * tile_aspect;
|
||||
vec2 max_tile_size = estimated_mask_resize_output_size / mask_resize_num_tiles;
|
||||
vec2 clamped_tile_size = clamp(temp_tile_size, min_tile_size, max_tile_size);
|
||||
|
||||
float x_tile_size_from_y = clamped_tile_size.y * tile_aspect_ratio;
|
||||
float y_tile_size_from_x = mix(clamped_tile_size.y, clamped_tile_size.x * tile_aspect_ratio_inv, float(solemnly_swear_same_inputs_for_every_pass));
|
||||
vec2 reclamped_tile_size = vec2(min(clamped_tile_size.x, x_tile_size_from_y), min(clamped_tile_size.y, y_tile_size_from_x));
|
||||
|
||||
// We need integer tile sizes in both directions for tiled sampling to
|
||||
// work correctly. Use floor (to make sure we don't round up), but be
|
||||
// careful to avoid a rounding bug where floor decreases whole numbers:
|
||||
vec2 final_resized_tile_size = floor(reclamped_tile_size + vec2(FIX_ZERO(0.0)));
|
||||
|
||||
return final_resized_tile_size;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
|
||||
tex_uv = TexCoord;
|
||||
|
||||
// Calculate a runtime bloom_sigma in case it's needed:
|
||||
float mask_tile_size_x = get_resized_mask_tile_size(params.OutputSize.xy, params.OutputSize.xy * mask_resize_viewport_scale, false).x;
|
||||
float bloom_sigma_runtime = get_min_sigma_to_blur_triad(mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
|
||||
|
||||
center_weight = get_fast_gaussian_weight_sum_inv(bloom_sigma_runtime);
|
||||
|
||||
float undim_factor = 1.0/levels_autodim_temp;
|
||||
|
||||
undim_mask_contrast_factors = undim_factor * get_mask_amplify() * levels_contrast;
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
layout(location = 0) in vec2 tex_uv;
|
||||
layout(location = 1) in float center_weight;
|
||||
layout(location = 2) in float undim_mask_contrast_factors;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D MASKED_SCANLINES;
|
||||
layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
|
||||
|
||||
////////////////////////////// FRAGMENT INCLUDES //////////////////////////////
|
||||
|
||||
void main()
|
||||
{
|
||||
// Sample the masked scanlines:
|
||||
vec3 intensity_dim = texture(MASKED_SCANLINEStexture, tex_uv).rgb;
|
||||
|
||||
// Get the full intensity, including auto-undimming, and mask compensation:
|
||||
vec3 intensity = intensity_dim * undim_mask_contrast_factors;
|
||||
|
||||
// Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
|
||||
// would look like, so we can estimate how much energy we'll receive from
|
||||
// blooming neighbors:
|
||||
vec3 phosphor_blur_approx = levels_contrast * texture(ORIG_LINEARIZED, tex_uv).rgb;
|
||||
|
||||
// Compute the blur weight for the center texel and the maximum energy we
|
||||
// expect to receive from neighbors:
|
||||
vec3 max_area_contribution_approx = max(vec3(0.0, 0.0, 0.0), phosphor_blur_approx - center_weight * intensity);
|
||||
|
||||
// Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
|
||||
// because it actually gets better results (on top of being very simple),
|
||||
// but adjust all intensities for the user's desired underestimate factor:
|
||||
vec3 area_contrib_underestimate = bloom_underestimate_levels * max_area_contribution_approx;
|
||||
vec3 intensity_underestimate = bloom_underestimate_levels * intensity;
|
||||
|
||||
// Calculate the blur_ratio, the ratio of intensity we want to blur:
|
||||
vec3 blur_ratio_temp = ((vec3(1.0, 1.0, 1.0) - area_contrib_underestimate) / intensity_underestimate - vec3(1.0, 1.0, 1.0)) / (center_weight - 1.0);
|
||||
vec3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
|
||||
|
||||
// Calculate the brightpass based on the auto-dimmed, unamplified, masked
|
||||
// scanlines, encode if necessary, and return!
|
||||
vec3 brightpass = intensity_dim * mix(blur_ratio, vec3(1.0, 1.0, 1.0), global.bloom_excess);
|
||||
|
||||
FragColor = vec4(brightpass, 1.0);
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
} params;
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
#include "scanline-functions.h"
|
||||
|
||||
#define GAMMA_IN(color) pow(color, vec3(crt_gamma))
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 tex_uv;
|
||||
layout(location = 1) out vec2 uv_step;
|
||||
layout(location = 2) out float interlaced;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
tex_uv = TexCoord * 1.00001;
|
||||
uv_step = vec2(1.0)/params.SourceSize.xy;
|
||||
|
||||
// Detect interlacing: 1.0 = true, 0.0 = false.
|
||||
vec2 _video_size = params.SourceSize.xy;
|
||||
interlaced = float(is_interlaced(_video_size.y));
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
#pragma format R8G8B8A8_SRGB
|
||||
layout(location = 0) in vec2 tex_uv;
|
||||
layout(location = 1) in vec2 uv_step;
|
||||
layout(location = 2) in float interlaced;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
#define input_texture Source
|
||||
|
||||
void main()
|
||||
{
|
||||
// Linearize the input based on CRT gamma and bob interlaced fields.
|
||||
// Bobbing ensures we can immediately blur without getting artifacts.
|
||||
// Note: TFF/BFF won't matter for sources that double-weave or similar.
|
||||
if(bool(interlace_detect))
|
||||
{
|
||||
// Sample the current line and an average of the previous/next line;
|
||||
// tex2D_linearize will decode CRT gamma. Don't bother branching:
|
||||
vec2 v_step = vec2(0.0, uv_step.y);
|
||||
|
||||
vec3 curr_line = GAMMA_IN(texture(input_texture, tex_uv ).rgb);
|
||||
vec3 last_line = GAMMA_IN(texture(input_texture, tex_uv - v_step).rgb);
|
||||
vec3 next_line = GAMMA_IN(texture(input_texture, tex_uv + v_step).rgb);
|
||||
|
||||
vec3 interpolated_line = 0.5 * (last_line + next_line);
|
||||
|
||||
// If we're interlacing, determine which field curr_line is in:
|
||||
float modulus = interlaced + 1.0;
|
||||
float field_offset = mod(params.FrameCount + global.interlace_bff, modulus);
|
||||
float curr_line_texel = tex_uv.y * params.SourceSize.y;
|
||||
|
||||
// Use under_half to fix a rounding bug around exact texel locations.
|
||||
float line_num_last = floor(curr_line_texel - under_half);
|
||||
float wrong_field = mod(line_num_last + field_offset, modulus);
|
||||
|
||||
// Select the correct color, and output the result:
|
||||
vec3 color = mix(curr_line, interpolated_line, wrong_field);
|
||||
FragColor = vec4(color, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
FragColor = vec4(GAMMA_IN(texture(input_texture, tex_uv).rgb), 1.0);
|
||||
}
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
} params;
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "phosphor-mask-resizing.h"
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 src_tex_uv_wrap;
|
||||
layout(location = 1) out vec2 tile_uv_wrap;
|
||||
layout(location = 2) out vec2 resize_magnification_scale;
|
||||
layout(location = 3) out vec2 src_dxdy;
|
||||
layout(location = 4) out vec2 tile_size_uv;
|
||||
layout(location = 5) out vec2 input_tiles_per_texture;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
|
||||
// First estimate the viewport size (the user will get the wrong number of
|
||||
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
|
||||
vec2 estimated_viewport_size = params.OutputSize.xy / mask_resize_viewport_scale;
|
||||
|
||||
// Find the final size of our resized phosphor mask tiles. We probably
|
||||
// estimated the viewport size and MASK_RESIZE output size differently last
|
||||
// pass, so do not swear they were the same. ;)
|
||||
vec2 mask_resize_tile_size = get_resized_mask_tile_size(estimated_viewport_size, params.OutputSize.xy, false);
|
||||
|
||||
// We'll render resized tiles until filling the output FBO or meeting a
|
||||
// limit, so compute [wrapped] tile uv coords based on the output uv coords
|
||||
// and the number of tiles that will fit in the FBO.
|
||||
vec2 output_tiles_this_pass = params.OutputSize.xy / mask_resize_tile_size;
|
||||
tile_uv_wrap = TexCoord * output_tiles_this_pass;
|
||||
|
||||
// Get the texel size of an input tile and related values:
|
||||
vec2 input_tile_size = vec2(min(mask_resize_src_lut_size.x, params.SourceSize.x), mask_resize_tile_size.y);
|
||||
tile_size_uv = input_tile_size / params.SourceSize.xy;
|
||||
input_tiles_per_texture = params.SourceSize.xy / input_tile_size;
|
||||
|
||||
// Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
|
||||
// the tile size in uv coords, and save frac() for the fragment shader.
|
||||
src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
|
||||
|
||||
// Output the values we need, including the magnification scale and step:
|
||||
resize_magnification_scale = mask_resize_tile_size / input_tile_size;
|
||||
src_dxdy = vec2(1.0/params.SourceSize.x, 0.0);
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
layout(location = 0) in vec2 src_tex_uv_wrap;
|
||||
layout(location = 1) in vec2 tile_uv_wrap;
|
||||
layout(location = 2) in vec2 resize_magnification_scale;
|
||||
layout(location = 3) in vec2 src_dxdy;
|
||||
layout(location = 4) in vec2 tile_size_uv;
|
||||
layout(location = 5) in vec2 input_tiles_per_texture;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
#define input_texture Source
|
||||
|
||||
void main()
|
||||
{
|
||||
// The input contains one mask tile horizontally and a number vertically.
|
||||
// Resize the tile horizontally to its final screen size and repeat it
|
||||
// until drawing at least mask_resize_num_tiles, leaving it unchanged
|
||||
// vertically. Lanczos-resizing the phosphor mask achieves much sharper
|
||||
// results than mipmapping, outputting >= mask_resize_num_tiles makes for
|
||||
// easier tiled sampling later.
|
||||
// Discard unneeded fragments in case our profile allows real branches.
|
||||
|
||||
if(max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
|
||||
{
|
||||
float src_dx = src_dxdy.x;
|
||||
vec2 src_tex_uv = fract(src_tex_uv_wrap);
|
||||
vec3 pixel_color = downsample_horizontal_sinc_tiled(input_texture, src_tex_uv, params.SourceSize.xy, src_dxdy.x, resize_magnification_scale.x, tile_size_uv.x);
|
||||
|
||||
// The input LUT was linear RGB, and so is our output:
|
||||
FragColor = vec4(pixel_color, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
discard;
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
} params;
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "phosphor-mask-resizing.h"
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 src_tex_uv_wrap;
|
||||
layout(location = 1) out vec2 resize_magnification_scale;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
|
||||
// First estimate the viewport size (the user will get the wrong number of
|
||||
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
|
||||
vec2 estimated_viewport_size = params.OutputSize.xy / mask_resize_viewport_scale.yy;
|
||||
|
||||
// Estimate the output size of MASK_RESIZE (the next pass). The estimated
|
||||
// x component shouldn't matter, because we're not using the x result, and
|
||||
// we're not swearing it's correct (if we did, the x result would influence
|
||||
// the y result to maintain the tile aspect ratio).
|
||||
vec2 estimated_mask_resize_output_size = params.OutputSize.xy;
|
||||
|
||||
// Find the final intended [y] size of our resized phosphor mask tiles,
|
||||
// then the tile size for the current pass (resize y only):
|
||||
vec2 mask_resize_tile_size = get_resized_mask_tile_size(estimated_viewport_size, estimated_mask_resize_output_size, false);
|
||||
vec2 pass_output_tile_size = vec2(min(mask_resize_src_lut_size.x, params.OutputSize.x), mask_resize_tile_size.y);
|
||||
|
||||
// We'll render resized tiles until filling the output FBO or meeting a
|
||||
// limit, so compute [wrapped] tile uv coords based on the output uv coords
|
||||
// and the number of tiles that will fit in the FBO.
|
||||
vec2 output_tiles_this_pass = params.OutputSize.xy / pass_output_tile_size;
|
||||
|
||||
// The input LUT is just a single mask tile, so texture uv coords are the
|
||||
// same as tile uv coords (save fract() for the fragment shader). The
|
||||
// magnification scale is also straightforward:
|
||||
src_tex_uv_wrap = TexCoord * output_tiles_this_pass;
|
||||
resize_magnification_scale = pass_output_tile_size / mask_resize_src_lut_size;
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
layout(location = 0) in vec2 src_tex_uv_wrap;
|
||||
layout(location = 1) in vec2 resize_magnification_scale;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
layout(set = 0, binding = 3) uniform sampler2D mask_grille_texture_small;
|
||||
layout(set = 0, binding = 4) uniform sampler2D mask_slot_texture_small;
|
||||
layout(set = 0, binding = 5) uniform sampler2D mask_shadow_texture_small;
|
||||
|
||||
void main()
|
||||
{
|
||||
// Resize the input phosphor mask tile to the final vertical size it will
|
||||
// appear on screen. Keep 1x horizontal size if possible (IN.output_size
|
||||
// >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
|
||||
// to fit exactly one tile. Lanczos-resizing the phosphor mask achieves
|
||||
// much sharper results than mipmapping, and vertically resizing first
|
||||
// minimizes the total number of taps required. We output a number of
|
||||
// resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
|
||||
//vec2 src_tex_uv_wrap = src_tex_uv_wrap;
|
||||
// Discard unneeded fragments in case our profile allows real branches.
|
||||
|
||||
if(src_tex_uv_wrap.y <= mask_resize_num_tiles)
|
||||
{
|
||||
float src_dy = 1.0/mask_resize_src_lut_size.y;
|
||||
vec2 src_tex_uv = fract(src_tex_uv_wrap);
|
||||
vec3 pixel_color;
|
||||
|
||||
if(mask_type < 0.5)
|
||||
{
|
||||
pixel_color = downsample_vertical_sinc_tiled(mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size, src_dy, resize_magnification_scale.y, 1.0);
|
||||
}
|
||||
else if(mask_type < 1.5)
|
||||
{
|
||||
pixel_color = downsample_vertical_sinc_tiled(mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size, src_dy, resize_magnification_scale.y, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pixel_color = downsample_vertical_sinc_tiled(mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size, src_dy, resize_magnification_scale.y, 1.0);
|
||||
}
|
||||
// The input LUT was linear RGB, and so is our output:
|
||||
FragColor = vec4(pixel_color, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
discard;
|
||||
}
|
||||
}
|
@ -0,0 +1,112 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
vec4 VERTICAL_SCANLINESSize;
|
||||
vec4 MASK_RESIZESize;
|
||||
} params;
|
||||
|
||||
#define VERTICAL_SCANLINEStexture VERTICAL_SCANLINES
|
||||
#define VERTICAL_SCANLINEStexture_size params.VERTICAL_SCANLINESSize.xy
|
||||
#define VERTICAL_SCANLINESvideo_size params.VERTICAL_SCANLINESSize.xy
|
||||
#define MASK_RESIZEtexture MASK_RESIZE
|
||||
#define MASK_RESIZEtexture_size params.MASK_RESIZESize.xy
|
||||
#define MASK_RESIZEvideo_size params.MASK_RESIZESize.xy
|
||||
|
||||
float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
|
||||
float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
|
||||
|
||||
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
|
||||
/////////////////////////////// VERTEX INCLUDES ///////////////////////////////
|
||||
|
||||
#include "scanline-functions.h"
|
||||
#include "phosphor-mask-resizing.h"
|
||||
|
||||
/////////////////////////////////// HELPERS //////////////////////////////////
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 video_uv;
|
||||
layout(location = 1) out vec2 scanline_texture_size_inv;
|
||||
layout(location = 2) out vec4 mask_tile_start_uv_and_size;
|
||||
layout(location = 3) out vec2 mask_tiles_per_screen;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
video_uv = TexCoord;
|
||||
|
||||
scanline_texture_size_inv = vec2(1.0, 1.0)/VERTICAL_SCANLINEStexture_size;
|
||||
|
||||
// Get a consistent name for the final mask texture size. Sample mode 0
|
||||
// uses the manually resized mask, but ignore it if we never resized.
|
||||
vec2 mask_resize_texture_size = MASK_RESIZEtexture_size;
|
||||
vec2 mask_resize_video_size = MASK_RESIZEvideo_size;
|
||||
|
||||
// Compute mask tile dimensions, starting points, etc.:
|
||||
mask_tile_start_uv_and_size = get_mask_sampling_parameters(mask_resize_texture_size, mask_resize_video_size, params.OutputSize.xy, mask_tiles_per_screen);
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
layout(location = 0) in vec2 video_uv;
|
||||
layout(location = 1) in vec2 scanline_texture_size_inv;
|
||||
layout(location = 2) in vec4 mask_tile_start_uv_and_size;
|
||||
layout(location = 3) in vec2 mask_tiles_per_screen;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
layout(set = 0, binding = 3) uniform sampler2D VERTICAL_SCANLINES;
|
||||
layout(set = 0, binding = 4) uniform sampler2D MASK_RESIZE;
|
||||
|
||||
////////////////////////////// FRAGMENT INCLUDES //////////////////////////////
|
||||
|
||||
void main()
|
||||
{
|
||||
// This pass: Sample (misconverged?) scanlines to the final horizontal
|
||||
// resolution, apply halation (bouncing electrons), and apply the phosphor
|
||||
// mask. Fake a bloom if requested. Unless we fake a bloom, the output
|
||||
// will be dim from the scanline auto-dim, mask dimming, and low gamma.
|
||||
|
||||
// Horizontally sample the current row (a vertically interpolated scanline)
|
||||
// and account for horizontal convergence offsets, given in units of texels.
|
||||
vec3 scanline_color_dim = sample_rgb_scanline_horizontal(VERTICAL_SCANLINEStexture, video_uv, VERTICAL_SCANLINEStexture_size, scanline_texture_size_inv);
|
||||
|
||||
// Sample the phosphor mask:
|
||||
vec2 tile_uv_wrap = video_uv * mask_tiles_per_screen;
|
||||
|
||||
vec2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(tile_uv_wrap, mask_tile_start_uv_and_size);
|
||||
|
||||
vec3 phosphor_mask_sample;
|
||||
|
||||
// Sample the resized mask, and avoid tiling artifacts:
|
||||
phosphor_mask_sample = texture(MASK_RESIZEtexture, mask_tex_uv).rgb;
|
||||
|
||||
// Apply the phosphor mask:
|
||||
vec3 phosphor_emission_dim = scanline_color_dim * phosphor_mask_sample;
|
||||
|
||||
FragColor = vec4(phosphor_emission_dim, 1.0);
|
||||
}
|
@ -0,0 +1,126 @@
|
||||
#version 450
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
vec4 ORIG_LINEARIZEDSize;
|
||||
} params;
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "bind-shader-params.h"
|
||||
#include "scanline-functions.h"
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 tex_uv;
|
||||
layout(location = 1) out vec2 uv_step; // uv size of a texel (x) and scanline (y)
|
||||
layout(location = 2) out vec2 il_step_multiple; // (1, 1) = progressive, (1, 2) = interlaced
|
||||
layout(location = 3) out float pixel_height_in_scanlines; // Height of an output pixel in scanlines
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
tex_uv = TexCoord * 1.00001;
|
||||
|
||||
// Detect interlacing: il_step_multiple indicates the step multiple between
|
||||
// lines: 1 is for progressive sources, and 2 is for interlaced sources.
|
||||
vec2 video_size_ = params.ORIG_LINEARIZEDSize.xy;
|
||||
float y_step = 1.0 + float(is_interlaced(video_size_.y));
|
||||
il_step_multiple = vec2(1.0, y_step);
|
||||
|
||||
// Get the uv tex coords step between one texel (x) and scanline (y):
|
||||
uv_step = il_step_multiple / params.ORIG_LINEARIZEDSize.xy;
|
||||
|
||||
// We need the pixel height in scanlines for antialiased/integral sampling:
|
||||
float ph = (video_size_.y / params.OutputSize.y) / il_step_multiple.y;
|
||||
pixel_height_in_scanlines = ph;
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
#pragma format R8G8B8A8_SRGB
|
||||
layout(location = 0) in vec2 tex_uv;
|
||||
layout(location = 1) in vec2 uv_step; // uv size of a texel (x) and scanline (y)
|
||||
layout(location = 2) in vec2 il_step_multiple; // (1, 1) = progressive, (1, 2) = interlaced
|
||||
layout(location = 3) in float pixel_height_in_scanlines; // Height of an output pixel in scanlines
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
|
||||
|
||||
#define input_texture ORIG_LINEARIZED
|
||||
|
||||
void main()
|
||||
{
|
||||
// This pass: Sample multiple (misconverged?) scanlines to the final
|
||||
// vertical resolution. Temporarily auto-dim the output to avoid clipping.
|
||||
|
||||
// Read some attributes into local variables:
|
||||
vec2 texture_size_ = params.ORIG_LINEARIZEDSize.xy;
|
||||
vec2 texture_size_inv = params.ORIG_LINEARIZEDSize.zw;
|
||||
|
||||
float frame_count = float(params.FrameCount);
|
||||
float ph = pixel_height_in_scanlines;
|
||||
|
||||
// Get the uv coords of the previous scanline (in this field), and the
|
||||
// scanline's distance from this sample, in scanlines.
|
||||
float dist;
|
||||
vec2 scanline_uv = get_last_scanline_uv(tex_uv, texture_size_, texture_size_inv, il_step_multiple, frame_count, dist);
|
||||
|
||||
// NOTE: Anisotropic filtering creates interlacing artifacts, which is why
|
||||
// ORIG_LINEARIZED bobbed any interlaced input before this pass.
|
||||
vec2 v_step = vec2(0.0, uv_step.y);
|
||||
vec3 scanline2_color = texture(input_texture, scanline_uv ).rgb;
|
||||
vec3 scanline3_color = texture(input_texture, scanline_uv + v_step).rgb;
|
||||
|
||||
vec3 scanline0_color, scanline1_color, scanline4_color, scanline5_color, scanline_outside_color;
|
||||
float dist_round;
|
||||
|
||||
// dist is in [0, 1]
|
||||
dist_round = round(dist);
|
||||
vec2 sample_1or4_uv_off = mix(-v_step, 2.0 * v_step, dist_round);
|
||||
scanline_outside_color = texture(input_texture, scanline_uv + sample_1or4_uv_off).rgb;
|
||||
|
||||
// Compute scanline contributions, accounting for vertical convergence.
|
||||
// Vertical convergence offsets are in units of current-field scanlines.
|
||||
// dist2 means "positive sample distance from scanline 2, in scanlines:"
|
||||
vec3 dist2 = vec3(dist);
|
||||
|
||||
// Calculate and sum final scanline contributions, starting with lines 2/3.
|
||||
// There is no normalization step, because we're not interpolating a
|
||||
// continuous signal. Instead, each scanline is an additive light source.
|
||||
vec3 scanline2_contrib = scanline_contrib(dist2, scanline2_color, ph, sigma_range, shape_range);
|
||||
vec3 scanline3_contrib = scanline_contrib(abs(vec3(1.0,1.0,1.0) - dist2), scanline3_color, ph, sigma_range, shape_range);
|
||||
vec3 scanline_intensity = scanline2_contrib + scanline3_contrib;
|
||||
|
||||
vec3 dist1or4 = mix(dist2 + vec3(1.0,1.0,1.0), vec3(2.0,2.0,2.0) - dist2, dist_round);
|
||||
vec3 scanline1or4_contrib = scanline_contrib(dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
|
||||
scanline_intensity += scanline1or4_contrib;
|
||||
|
||||
// Auto-dim the image to avoid clipping, encode if necessary, and output.
|
||||
// My original idea was to compute a minimal auto-dim factor and put it in
|
||||
// the alpha channel, but it wasn't working, at least not reliably. This
|
||||
// is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
|
||||
FragColor = vec4(scanline_intensity * levels_autodim_temp, 1.0);
|
||||
}
|
271
crt/shaders/crt-royale/src-fast/phosphor-mask-resizing.h
Normal file
271
crt/shaders/crt-royale/src-fast/phosphor-mask-resizing.h
Normal file
@ -0,0 +1,271 @@
|
||||
#ifndef PHOSPHOR_MASK_RESIZING_H
|
||||
#define PHOSPHOR_MASK_RESIZING_H
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////// CODEPATH SELECTION /////////////////////////////
|
||||
|
||||
#define USE_SINGLE_STATIC_LOOP
|
||||
|
||||
////////////////////////////////// CONSTANTS /////////////////////////////////
|
||||
|
||||
// The larger the resized tile, the fewer samples we'll need for downsizing.
|
||||
// See if we can get a static min tile size > mask_min_allowed_tile_size:
|
||||
float mask_min_allowed_tile_size = ceil(mask_min_allowed_triad_size * mask_triads_per_tile);
|
||||
float mask_min_expected_tile_size = mask_min_allowed_tile_size;
|
||||
|
||||
// Limit the number of sinc resize taps by the maximum minification factor:
|
||||
float pi_over_lobes = pi/mask_sinc_lobes;
|
||||
float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes * mask_resize_src_lut_size.x/mask_min_expected_tile_size;
|
||||
|
||||
// Vectorized loops sample in multiples of 4. Round up to be safe:
|
||||
float max_sinc_resize_samples_m4 = ceil(max_sinc_resize_samples_float * 0.25) * 4.0;
|
||||
|
||||
|
||||
///////////////////////// RESAMPLING FUNCTION HELPERS ////////////////////////
|
||||
|
||||
|
||||
vec2 get_first_texel_tile_uv_and_dist(vec2 tex_uv, vec2 tex_size, float dr, float input_tiles_per_texture_r, float samples, bool vertical)
|
||||
{
|
||||
|
||||
vec2 curr_texel = tex_uv * tex_size;
|
||||
vec2 prev_texel = floor(curr_texel - vec2(under_half)) + vec2(0.5);
|
||||
vec2 first_texel = prev_texel - vec2(samples/2.0 - 1.0);
|
||||
vec2 first_texel_uv_wrap_2D = first_texel * dr;
|
||||
vec2 first_texel_dist_2D = curr_texel - first_texel;
|
||||
|
||||
// Convert from tex_uv to tile_uv coords so we can sub fracts for fmods.
|
||||
vec2 first_texel_tile_uv_wrap_2D = first_texel_uv_wrap_2D * input_tiles_per_texture_r;
|
||||
|
||||
// Project wrapped coordinates to the [0, 1] range. We'll do this with all
|
||||
// samples,but the first texel is special, since it might be negative.
|
||||
vec2 coord_negative = vec2((first_texel_tile_uv_wrap_2D.x < 0.),(first_texel_tile_uv_wrap_2D.y < 0.));
|
||||
vec2 first_texel_tile_uv_2D = fract(first_texel_tile_uv_wrap_2D) + coord_negative;
|
||||
|
||||
// Pack the first texel's tile_uv coord and texel distance in 1D:
|
||||
vec2 tile_u_and_dist = vec2(first_texel_tile_uv_2D.x, first_texel_dist_2D.x);
|
||||
vec2 tile_v_and_dist = vec2(first_texel_tile_uv_2D.y, first_texel_dist_2D.y);
|
||||
|
||||
return vertical ? tile_v_and_dist : tile_u_and_dist;
|
||||
}
|
||||
|
||||
vec4 tex2Dlod0try(sampler2D tex, vec2 tex_uv)
|
||||
{
|
||||
// Mipmapping and anisotropic filtering get confused by sinc-resampling.
|
||||
// One [slow] workaround is to select the lowest mip level:
|
||||
return texture(tex, tex_uv);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////// LOOP BODY MACROS //////////////////////////////
|
||||
|
||||
|
||||
#define CALCULATE_R_COORD_FOR_4_SAMPLES \
|
||||
vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \
|
||||
vec4 tile_uv_r = fract( \
|
||||
first_texel_tile_uv_rrrr + true_i * tile_dr); \
|
||||
vec4 tex_uv_r = tile_uv_r * tile_size_uv_r;
|
||||
|
||||
#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
vec4 pi_dist_over_lobes = pi_over_lobes * dist; \
|
||||
vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
|
||||
(pi_dist*pi_dist_over_lobes), vec4(1.0));
|
||||
#else
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0));
|
||||
#endif
|
||||
|
||||
#define UPDATE_COLOR_AND_WEIGHT_SUMS \
|
||||
vec4 dist = magnification_scale * \
|
||||
abs(first_dist_unscaled - true_i); \
|
||||
vec4 pi_dist = pi * dist; \
|
||||
CALCULATE_SINC_RESAMPLE_WEIGHTS; \
|
||||
pixel_color += new_sample0 * weights.xxx; \
|
||||
pixel_color += new_sample1 * weights.yyy; \
|
||||
pixel_color += new_sample2 * weights.zzz; \
|
||||
pixel_color += new_sample3 * weights.www; \
|
||||
weight_sum += weights;
|
||||
|
||||
#define VERTICAL_SINC_RESAMPLE_LOOP_BODY \
|
||||
CALCULATE_R_COORD_FOR_4_SAMPLES; \
|
||||
vec3 new_sample0 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv.x, tex_uv_r.x)).rgb; \
|
||||
vec3 new_sample1 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv.x, tex_uv_r.y)).rgb; \
|
||||
vec3 new_sample2 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv.x, tex_uv_r.z)).rgb; \
|
||||
vec3 new_sample3 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv.x, tex_uv_r.w)).rgb; \
|
||||
UPDATE_COLOR_AND_WEIGHT_SUMS;
|
||||
|
||||
#define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \
|
||||
CALCULATE_R_COORD_FOR_4_SAMPLES; \
|
||||
vec3 new_sample0 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv_r.x, tex_uv.y)).rgb; \
|
||||
vec3 new_sample1 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv_r.y, tex_uv.y)).rgb; \
|
||||
vec3 new_sample2 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv_r.z, tex_uv.y)).rgb; \
|
||||
vec3 new_sample3 = tex2Dlod0try(tex, \
|
||||
vec2(tex_uv_r.w, tex_uv.y)).rgb; \
|
||||
UPDATE_COLOR_AND_WEIGHT_SUMS;
|
||||
|
||||
|
||||
//////////////////////////// RESAMPLING FUNCTIONS ////////////////////////////
|
||||
|
||||
vec3 downsample_vertical_sinc_tiled(sampler2D tex, vec2 tex_uv, vec2 tex_size, float dr, float magnification_scale, float tile_size_uv_r)
|
||||
{
|
||||
int samples = int(max_sinc_resize_samples_m4);
|
||||
|
||||
// Get the first sample location (scalar tile uv coord along the resized
|
||||
// dimension) and distance from the output location (in texels):
|
||||
float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
|
||||
|
||||
// true = vertical resize:
|
||||
vec2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(tex_uv, tex_size, dr, input_tiles_per_texture_r, samples, true);
|
||||
vec4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
|
||||
vec4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
|
||||
|
||||
// Get the tile sample offset:
|
||||
float tile_dr = dr * input_tiles_per_texture_r;
|
||||
|
||||
// Sum up each weight and weighted sample color, varying the looping
|
||||
// strategy based on our expected dynamic loop capabilities. See the
|
||||
// loop body macros above.
|
||||
int i_base = 0;
|
||||
int i_step = 4;
|
||||
vec4 weight_sum = vec4(0.0);
|
||||
vec3 pixel_color = vec3(0.0);
|
||||
|
||||
for(int i = 0; i < samples; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
|
||||
// Normalize so the weight_sum == 1.0, and return:
|
||||
vec2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
|
||||
vec3 scalar_weight_sum = vec3(weight_sum_reduce.x + weight_sum_reduce.y);
|
||||
|
||||
return (pixel_color/scalar_weight_sum);
|
||||
}
|
||||
|
||||
vec3 downsample_horizontal_sinc_tiled(sampler2D tex, vec2 tex_uv, vec2 tex_size, float dr, float magnification_scale, float tile_size_uv_r)
|
||||
{
|
||||
int samples = int(max_sinc_resize_samples_m4);
|
||||
|
||||
// Get the first sample location (scalar tile uv coord along resized
|
||||
// dimension) and distance from the output location (in texels):
|
||||
float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
|
||||
|
||||
// false = horizontal resize:
|
||||
vec2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(tex_uv, tex_size, dr, input_tiles_per_texture_r, samples, false);
|
||||
vec4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
|
||||
vec4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
|
||||
|
||||
// Get the tile sample offset:
|
||||
float tile_dr = dr * input_tiles_per_texture_r;
|
||||
|
||||
// Sum up each weight and weighted sample color, varying the looping
|
||||
// strategy based on our expected dynamic loop capabilities. See the
|
||||
// loop body macros above.
|
||||
int i_base = 0;
|
||||
int i_step = 4;
|
||||
vec4 weight_sum = vec4(0.0);
|
||||
vec3 pixel_color = vec3(0.0);
|
||||
|
||||
for(int i = 0; i < samples; i += i_step)
|
||||
{
|
||||
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
|
||||
// Normalize so the weight_sum == 1.0, and return:
|
||||
vec2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
|
||||
vec3 scalar_weight_sum = vec3(weight_sum_reduce.x + weight_sum_reduce.y);
|
||||
|
||||
return (pixel_color/scalar_weight_sum);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////// TILE SIZE CALCULATION ///////////////////////////
|
||||
|
||||
vec2 get_resized_mask_tile_size(vec2 estimated_viewport_size, vec2 estimated_mask_resize_output_size, bool solemnly_swear_same_inputs_for_every_pass)
|
||||
{
|
||||
// Stated tile properties must be correct:
|
||||
float tile_aspect_ratio_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
|
||||
float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
|
||||
vec2 tile_aspect = vec2(1.0, tile_aspect_ratio_inv);
|
||||
|
||||
float desired_tile_size_x = mask_triads_per_tile * global.mask_triad_size_desired;
|
||||
|
||||
// Make sure we're not upsizing:
|
||||
float temp_tile_size_x = min(desired_tile_size_x, mask_resize_src_lut_size.x);
|
||||
|
||||
// Enforce min_tile_size and max_tile_size in both dimensions:
|
||||
vec2 temp_tile_size = temp_tile_size_x * tile_aspect;
|
||||
vec2 min_tile_size = mask_min_allowed_tile_size * tile_aspect;
|
||||
vec2 max_tile_size = estimated_mask_resize_output_size / mask_resize_num_tiles;
|
||||
vec2 clamped_tile_size = clamp(temp_tile_size, min_tile_size, max_tile_size);
|
||||
|
||||
float x_tile_size_from_y = clamped_tile_size.y * tile_aspect_ratio;
|
||||
float y_tile_size_from_x = mix(clamped_tile_size.y, clamped_tile_size.x * tile_aspect_ratio_inv, float(solemnly_swear_same_inputs_for_every_pass));
|
||||
vec2 reclamped_tile_size = vec2(min(clamped_tile_size.x, x_tile_size_from_y), min(clamped_tile_size.y, y_tile_size_from_x));
|
||||
|
||||
// We need integer tile sizes in both directions for tiled sampling to
|
||||
// work correctly. Use floor (to make sure we don't round up), but be
|
||||
// careful to avoid a rounding bug where floor decreases whole numbers:
|
||||
vec2 final_resized_tile_size = floor(reclamped_tile_size + vec2(FIX_ZERO(0.0)));
|
||||
|
||||
return final_resized_tile_size;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////// FINAL MASK SAMPLING HELPERS ////////////////////////
|
||||
|
||||
vec4 get_mask_sampling_parameters(vec2 mask_resize_texture_size, vec2 mask_resize_video_size, vec2 true_viewport_size, out vec2 mask_tiles_per_screen)
|
||||
{
|
||||
vec2 mask_resize_tile_size = get_resized_mask_tile_size(true_viewport_size, mask_resize_video_size, false);
|
||||
|
||||
// Sample MASK_RESIZE: The resized tile is a fracttion of the texture
|
||||
// size and starts at a nonzero offset to allow for border texels:
|
||||
vec2 mask_tile_uv_size = mask_resize_tile_size / mask_resize_texture_size;
|
||||
vec2 skipped_tiles = mask_start_texels/mask_resize_tile_size;
|
||||
vec2 mask_tile_start_uv = skipped_tiles * mask_tile_uv_size;
|
||||
|
||||
// mask_tiles_per_screen must be based on the *true* viewport size:
|
||||
mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
|
||||
|
||||
return vec4(mask_tile_start_uv, mask_tile_uv_size);
|
||||
}
|
||||
|
||||
vec2 convert_phosphor_tile_uv_wrap_to_tex_uv(vec2 tile_uv_wrap, vec4 mask_tile_start_uv_and_size)
|
||||
{
|
||||
vec2 tile_uv = fract(tile_uv_wrap);
|
||||
vec2 mask_tex_uv = mask_tile_start_uv_and_size.xy + tile_uv * mask_tile_start_uv_and_size.zw;
|
||||
|
||||
return mask_tex_uv;
|
||||
}
|
||||
|
||||
|
||||
#endif // PHOSPHOR_MASK_RESIZING_H
|
||||
|
309
crt/shaders/crt-royale/src-fast/scanline-functions.h
Normal file
309
crt/shaders/crt-royale/src-fast/scanline-functions.h
Normal file
@ -0,0 +1,309 @@
|
||||
#ifndef SCANLINE_FUNCTIONS_H
|
||||
#define SCANLINE_FUNCTIONS_H
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "special-functions.h"
|
||||
|
||||
///////////////////////////// SCANLINE FUNCTIONS /////////////////////////////
|
||||
|
||||
vec3 get_gaussian_sigma(vec3 color, float sigma_range)
|
||||
{
|
||||
if(beam_spot_shape_function < 0.5)
|
||||
{
|
||||
// Use a power function:
|
||||
return vec3(beam_min_sigma) + sigma_range * pow(color, vec3(beam_spot_power));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use a spherical function:
|
||||
vec3 color_minus_1 = color - vec3(1.0);
|
||||
|
||||
return vec3(beam_min_sigma) + sigma_range * sqrt(vec3(1.0) - color_minus_1*color_minus_1);
|
||||
}
|
||||
}
|
||||
|
||||
vec3 get_generalized_gaussian_beta(vec3 color, float shape_range)
|
||||
{
|
||||
return beam_min_shape + shape_range * pow(color, vec3(beam_shape_power));
|
||||
}
|
||||
|
||||
vec3 scanline_gaussian_integral_contrib(vec3 dist, vec3 color, float pixel_height, float sigma_range)
|
||||
{
|
||||
vec3 sigma = get_gaussian_sigma(color, sigma_range);
|
||||
vec3 ph_offset = vec3(pixel_height * 0.5);
|
||||
vec3 denom_inv = 1.0/(sigma*sqrt(2.0));
|
||||
vec3 integral_high = erf((dist + ph_offset)*denom_inv);
|
||||
vec3 integral_low = erf((dist - ph_offset)*denom_inv);
|
||||
|
||||
return color * 0.5*(integral_high - integral_low)/pixel_height;
|
||||
}
|
||||
|
||||
vec3 scanline_generalized_gaussian_integral_contrib(vec3 dist, vec3 color, float pixel_height, float sigma_range, float shape_range)
|
||||
{
|
||||
vec3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
|
||||
vec3 beta = get_generalized_gaussian_beta(color, shape_range);
|
||||
vec3 alpha_inv = vec3(1.0)/alpha;
|
||||
vec3 s = vec3(1.0)/beta;
|
||||
vec3 ph_offset = vec3(pixel_height * 0.5);
|
||||
|
||||
vec3 gamma_s_inv = vec3(1.0)/gamma_impl(s, beta);
|
||||
vec3 dist1 = dist + ph_offset;
|
||||
vec3 dist0 = dist - ph_offset;
|
||||
vec3 integral_high = sign(dist1) * normalized_ligamma_impl(s, pow(abs(dist1)*alpha_inv, beta), beta, gamma_s_inv);
|
||||
vec3 integral_low = sign(dist0) * normalized_ligamma_impl(s, pow(abs(dist0)*alpha_inv, beta), beta, gamma_s_inv);
|
||||
|
||||
return color * 0.5*(integral_high - integral_low)/pixel_height;
|
||||
}
|
||||
|
||||
vec3 scanline_gaussian_sampled_contrib(vec3 dist, vec3 color, float pixel_height, float sigma_range)
|
||||
{
|
||||
vec3 sigma = get_gaussian_sigma(color, sigma_range);
|
||||
vec3 sigma_inv = vec3(1.0)/sigma;
|
||||
|
||||
vec3 inner_denom_inv = 0.5 * sigma_inv * sigma_inv;
|
||||
vec3 outer_denom_inv = sigma_inv/sqrt(2.0*pi);
|
||||
|
||||
if(beam_antialias_level > 0.5)
|
||||
{
|
||||
// Sample 1/3 pixel away in each direction as well:
|
||||
vec3 sample_offset = vec3(pixel_height/3.0);
|
||||
vec3 dist2 = dist + sample_offset;
|
||||
vec3 dist3 = abs(dist - sample_offset);
|
||||
|
||||
// Average three pure Gaussian samples:
|
||||
vec3 scale = color/3.0 * outer_denom_inv;
|
||||
vec3 weight1 = exp(-( dist* dist)*inner_denom_inv);
|
||||
vec3 weight2 = exp(-(dist2*dist2)*inner_denom_inv);
|
||||
vec3 weight3 = exp(-(dist3*dist3)*inner_denom_inv);
|
||||
|
||||
return scale * (weight1 + weight2 + weight3);
|
||||
}
|
||||
else
|
||||
{
|
||||
return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv;
|
||||
}
|
||||
}
|
||||
|
||||
vec3 scanline_generalized_gaussian_sampled_contrib(vec3 dist, vec3 color, float pixel_height, float sigma_range, float shape_range)
|
||||
{
|
||||
vec3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
|
||||
vec3 beta = get_generalized_gaussian_beta(color, shape_range);
|
||||
|
||||
// Avoid repeated divides:
|
||||
vec3 alpha_inv = vec3(1.0)/alpha;
|
||||
vec3 beta_inv = vec3(1.0)/beta;
|
||||
vec3 scale = color * beta * 0.5 * alpha_inv / gamma_impl(beta_inv, beta);
|
||||
|
||||
if(beam_antialias_level > 0.5)
|
||||
{
|
||||
// Sample 1/3 pixel closer to and farther from the scanline too.
|
||||
vec3 sample_offset = vec3(pixel_height/3.0);
|
||||
vec3 dist2 = dist + sample_offset;
|
||||
vec3 dist3 = abs(dist - sample_offset);
|
||||
|
||||
// Average three generalized Gaussian samples:
|
||||
vec3 weight1 = exp(-pow(abs( dist*alpha_inv), beta));
|
||||
vec3 weight2 = exp(-pow(abs(dist2*alpha_inv), beta));
|
||||
vec3 weight3 = exp(-pow(abs(dist3*alpha_inv), beta));
|
||||
|
||||
return scale/3.0 * (weight1 + weight2 + weight3);
|
||||
}
|
||||
else
|
||||
{
|
||||
return scale * exp(-pow(abs(dist*alpha_inv), beta));
|
||||
}
|
||||
}
|
||||
|
||||
vec3 scanline_contrib(vec3 dist, vec3 color, float pixel_height, float sigma_range, float shape_range)
|
||||
{
|
||||
if(beam_generalized_gaussian)
|
||||
{
|
||||
if(beam_antialias_level > 1.5)
|
||||
{
|
||||
return scanline_generalized_gaussian_integral_contrib(dist, color, pixel_height, sigma_range, shape_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
return scanline_generalized_gaussian_sampled_contrib(dist, color, pixel_height, sigma_range, shape_range);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(beam_antialias_level > 1.5)
|
||||
{
|
||||
return scanline_gaussian_integral_contrib(dist, color, pixel_height, sigma_range);
|
||||
}
|
||||
else
|
||||
{
|
||||
return scanline_gaussian_sampled_contrib(dist, color, pixel_height, sigma_range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// 2 - Apply mask only.
|
||||
vec3 get_raw_interpolated_color(vec3 color0, vec3 color1, vec3 color2, vec3 color3, vec4 weights)
|
||||
{
|
||||
// Use max to avoid bizarre artifacts from negative colors:
|
||||
return max((mat4x3(color0, color1, color2, color3) * weights), 0.0);
|
||||
}
|
||||
|
||||
vec3 get_interpolated_linear_color(vec3 color0, vec3 color1, vec3 color2, vec3 color3, vec4 weights)
|
||||
{
|
||||
float intermediate_gamma = lcd_gamma;
|
||||
|
||||
// Inputs: color0-3 are colors in linear RGB.
|
||||
vec3 linear_mixed_color = get_raw_interpolated_color(color0, color1, color2, color3, weights);
|
||||
|
||||
vec3 gamma_mixed_color = get_raw_interpolated_color(
|
||||
pow(color0, vec3(1.0/intermediate_gamma)),
|
||||
pow(color1, vec3(1.0/intermediate_gamma)),
|
||||
pow(color2, vec3(1.0/intermediate_gamma)),
|
||||
pow(color3, vec3(1.0/intermediate_gamma)),
|
||||
weights);
|
||||
// wtf fixme
|
||||
// float beam_horiz_linear_rgb_weight1 = 1.0;
|
||||
return mix(gamma_mixed_color, linear_mixed_color, global.beam_horiz_linear_rgb_weight);
|
||||
}
|
||||
|
||||
vec3 get_scanline_color(sampler2D tex, vec2 scanline_uv, vec2 uv_step_x, vec4 weights)
|
||||
{
|
||||
vec3 color1 = texture(tex, scanline_uv).rgb;
|
||||
vec3 color2 = texture(tex, scanline_uv + uv_step_x).rgb;
|
||||
vec3 color0 = vec3(0.0);
|
||||
vec3 color3 = vec3(0.0);
|
||||
|
||||
if(beam_horiz_filter > 0.5)
|
||||
{
|
||||
color0 = texture(tex, scanline_uv - uv_step_x).rgb;
|
||||
color3 = texture(tex, scanline_uv + 2.0 * uv_step_x).rgb;
|
||||
}
|
||||
|
||||
return get_interpolated_linear_color(color0, color1, color2, color3, weights);
|
||||
}
|
||||
|
||||
vec3 sample_single_scanline_horizontal(sampler2D tex, vec2 tex_uv, vec2 tex_size, vec2 texture_size_inv)
|
||||
{
|
||||
vec2 curr_texel = tex_uv * tex_size;
|
||||
|
||||
// Use under_half to fix a rounding bug right around exact texel locations.
|
||||
vec2 prev_texel = floor(curr_texel - vec2(under_half)) + vec2(0.5);
|
||||
vec2 prev_texel_hor = vec2(prev_texel.x, curr_texel.y);
|
||||
vec2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv;
|
||||
|
||||
float prev_dist = curr_texel.x - prev_texel_hor.x;
|
||||
vec4 sample_dists = vec4(1.0 + prev_dist, prev_dist, 1.0 - prev_dist, 2.0 - prev_dist);
|
||||
|
||||
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
|
||||
vec4 weights;
|
||||
|
||||
if(beam_horiz_filter < 0.5)
|
||||
{
|
||||
// Quilez:
|
||||
float x = sample_dists.y;
|
||||
float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
|
||||
weights = vec4(0.0, 1.0 - w2, w2, 0.0);
|
||||
}
|
||||
else if(beam_horiz_filter < 1.5)
|
||||
{
|
||||
// Gaussian:
|
||||
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
|
||||
weights = exp(-(sample_dists*sample_dists)*inner_denom_inv);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Lanczos2:
|
||||
vec4 pi_dists = FIX_ZERO(sample_dists * pi);
|
||||
weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5)/(pi_dists * pi_dists);
|
||||
}
|
||||
|
||||
// Ensure the weight sum == 1.0:
|
||||
vec4 final_weights = weights/dot(weights, vec4(1.0));
|
||||
|
||||
// Get the interpolated horizontal scanline color:
|
||||
vec2 uv_step_x = vec2(texture_size_inv.x, 0.0);
|
||||
|
||||
return get_scanline_color(tex, prev_texel_hor_uv, uv_step_x, final_weights);
|
||||
}
|
||||
|
||||
vec3 sample_rgb_scanline_horizontal(sampler2D tex, vec2 tex_uv, vec2 tex_size, vec2 texture_size_inv)
|
||||
{
|
||||
// TODO: Add function requirements.
|
||||
return sample_single_scanline_horizontal(tex, tex_uv, tex_size, texture_size_inv);
|
||||
}
|
||||
|
||||
// Monolythic
|
||||
vec2 get_last_scanline_uv( vec2 tex_uv,
|
||||
vec2 tex_size, vec2 texture_size_inv,
|
||||
vec2 il_step_multiple,
|
||||
float frame_count, out float dist)
|
||||
{
|
||||
float field_offset = floor(il_step_multiple.y * 0.75)*mod(frame_count + float(global.interlace_bff), 2.0);
|
||||
vec2 curr_texel = tex_uv * tex_size;
|
||||
|
||||
// Use under_half to fix a rounding bug right around exact texel locations.
|
||||
vec2 prev_texel_num = floor(curr_texel - vec2(under_half));
|
||||
float wrong_field = mod(prev_texel_num.y + field_offset, il_step_multiple.y);
|
||||
vec2 scanline_texel_num = prev_texel_num - vec2(0.0, wrong_field);
|
||||
|
||||
// Snap to the center of the previous scanline in the current field:
|
||||
vec2 scanline_texel = scanline_texel_num + vec2(0.5);
|
||||
vec2 scanline_uv = scanline_texel * texture_size_inv;
|
||||
|
||||
// Save the sample's distance from the scanline, in units of scanlines:
|
||||
dist = (curr_texel.y - scanline_texel.y)/il_step_multiple.y;
|
||||
|
||||
return scanline_uv;
|
||||
}
|
||||
|
||||
bool is_interlaced(float num_lines)
|
||||
{
|
||||
// Detect interlacing based on the number of lines in the source.
|
||||
if(interlace_detect)
|
||||
{
|
||||
// NTSC: 525 lines, 262.5/field; 486 active (2 half-lines), 243/field
|
||||
// NTSC Emulators: Typically 224 or 240 lines
|
||||
// PAL: 625 lines, 312.5/field; 576 active (typical), 288/field
|
||||
// PAL Emulators: ?
|
||||
// ATSC: 720p, 1080i, 1080p
|
||||
// Where do we place our cutoffs? Assumptions:
|
||||
// 1.) We only need to care about active lines.
|
||||
// 2.) Anything > 288 and <= 576 lines is probably interlaced.
|
||||
// 3.) Anything > 576 lines is probably not interlaced...
|
||||
// 4.) ...except 1080 lines, which is a crapshoot (user decision).
|
||||
// 5.) Just in case the main program uses calculated video sizes,
|
||||
// we should nudge the float thresholds a bit.
|
||||
bool sd_interlace = ((num_lines > 288.5) && (num_lines < 576.5));
|
||||
bool hd_interlace = bool(interlace_1080i) ? ((num_lines > 1079.5) && (num_lines < 1080.5)) : false;
|
||||
|
||||
return (sd_interlace || hd_interlace);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // SCANLINE_FUNCTIONS_H
|
||||
|
182
crt/shaders/crt-royale/src-fast/special-functions.h
Normal file
182
crt/shaders/crt-royale/src-fast/special-functions.h
Normal file
@ -0,0 +1,182 @@
|
||||
#ifndef SPECIAL_FUNCTIONS_H
|
||||
#define SPECIAL_FUNCTIONS_H
|
||||
|
||||
///////////////////////////////// MIT LICENSE ////////////////////////////////
|
||||
|
||||
// Copyright (C) 2014 TroggleMonkey
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal in the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
// IN THE SOFTWARE.
|
||||
|
||||
|
||||
/////////////////////////// GAUSSIAN ERROR FUNCTION //////////////////////////
|
||||
|
||||
vec3 erf6(vec3 x)
|
||||
{
|
||||
// Requires: x is the standard parameter to erf().
|
||||
// Returns: Return an Abramowitz/Stegun approximation of erf(), where:
|
||||
// erf(x) = 2/sqrt(pi) * integral(e**(-x**2))
|
||||
// This approximation has a max absolute error of 2.5*10**-5
|
||||
// with solid numerical robustness and efficiency. See:
|
||||
// https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
|
||||
vec3 one = vec3(1.0);
|
||||
vec3 sign_x = sign(x);
|
||||
vec3 t = one/(one + 0.47047*abs(x));
|
||||
vec3 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
|
||||
exp(-(x*x));
|
||||
return result * sign_x;
|
||||
}
|
||||
|
||||
vec3 erft(vec3 x)
|
||||
{
|
||||
// Requires: x is the standard parameter to erf().
|
||||
// Returns: Approximate erf() with the hyperbolic tangent. The error is
|
||||
// visually noticeable, but it's blazing fast and perceptually
|
||||
// close...at least on ATI hardware. See:
|
||||
// http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html
|
||||
// Warning: Only use this if your hardware drivers correctly implement
|
||||
// tanh(): My nVidia 8800GTS returns garbage output.
|
||||
return tanh(1.202760580 * x);
|
||||
}
|
||||
|
||||
vec3 erf(vec3 x)
|
||||
{
|
||||
// Requires: x is the standard parameter to erf().
|
||||
// Returns: Some approximation of erf(x), depending on user settings.
|
||||
#ifdef ERF_FAST_APPROXIMATION
|
||||
return erft(x);
|
||||
#else
|
||||
return erf6(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
/////////////////////////// COMPLETE GAMMA FUNCTION //////////////////////////
|
||||
|
||||
|
||||
vec3 gamma_impl(vec3 s, vec3 s_inv)
|
||||
{
|
||||
// Requires: 1.) s is the standard parameter to the gamma function, and
|
||||
// it should lie in the [0, 36] range.
|
||||
// 2.) s_inv = 1.0/s. This implementation function requires
|
||||
// the caller to precompute this value, giving users the
|
||||
// opportunity to reuse it.
|
||||
// Returns: Return approximate gamma function (real-numbered factorial)
|
||||
// output using the Lanczos approximation with two coefficients
|
||||
// calculated using Paul Godfrey's method here:
|
||||
// http://my.fit.edu/~gabdo/gamma.txt
|
||||
// An optimal g value for s in [0, 36] is ~1.12906830989, with
|
||||
// a maximum relative error of 0.000463 for 2**16 equally
|
||||
// evals. We could use three coeffs (0.0000346 error) without
|
||||
// hurting latency, but this allows more parallelism with
|
||||
// outside instructions.
|
||||
vec3 g = vec3(1.12906830989);
|
||||
vec3 c0 = vec3(0.8109119309638332633713423362694399653724431);
|
||||
vec3 c1 = vec3(0.4808354605142681877121661197951496120000040);
|
||||
vec3 e = vec3(2.71828182845904523536028747135266249775724709);
|
||||
vec3 sph = s + vec3(0.5);
|
||||
vec3 lanczos_sum = c0 + c1/(s + vec3(1.0));
|
||||
vec3 base = (sph + g)/e;
|
||||
return (pow(base, sph) * lanczos_sum) * s_inv;
|
||||
}
|
||||
|
||||
|
||||
//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) ///////////////
|
||||
|
||||
// Lower incomplete gamma function for small s and z (implementation):
|
||||
vec3 ligamma_small_z_impl(vec3 s, vec3 z, vec3 s_inv)
|
||||
{
|
||||
// Requires: 1.) s < ~0.5
|
||||
// 2.) z <= ~0.775075
|
||||
// 3.) s_inv = 1.0/s (precomputed for outside reuse)
|
||||
// Returns: A series representation for the lower incomplete gamma
|
||||
// function for small s and small z (4 terms).
|
||||
// The actual "rolled up" summation looks like:
|
||||
// last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0;
|
||||
// sum = last_sign * last_pow / ((s + k) * last_factorial)
|
||||
// for(int i = 0; i < 4; ++i)
|
||||
// {
|
||||
// last_sign *= -1.0; last_pow *= z; last_factorial *= i;
|
||||
// sum += last_sign * last_pow / ((s + k) * last_factorial);
|
||||
// }
|
||||
// Unrolled, constant-unfolded and arranged for madds and parallelism:
|
||||
vec3 scale = pow(z, s);
|
||||
vec3 sum = s_inv;
|
||||
vec3 z_sq = z*z;
|
||||
vec3 denom1 = s + vec3(1.0);
|
||||
vec3 denom2 = 2.0*s + vec3(4.0);
|
||||
vec3 denom3 = 6.0*s + vec3(18.0);
|
||||
sum -= z/denom1;
|
||||
sum += z_sq/denom2;
|
||||
sum -= z * z_sq/denom3;
|
||||
return scale * sum;
|
||||
}
|
||||
|
||||
// Upper incomplete gamma function for small s and large z (implementation):
|
||||
vec3 uigamma_large_z_impl(vec3 s, vec3 z)
|
||||
{
|
||||
// Requires: 1.) s < ~0.5
|
||||
// 2.) z > ~0.775075
|
||||
// Returns: Gauss's continued fraction representation for the upper
|
||||
// incomplete gamma function (4 terms).
|
||||
// The "rolled up" continued fraction looks like this. The denominator
|
||||
// is truncated, and it's calculated "from the bottom up:"
|
||||
// denom = vec4('inf');
|
||||
// vec4 one = vec4(1.0);
|
||||
// for(int i = 4; i > 0; --i)
|
||||
// {
|
||||
// denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom;
|
||||
// }
|
||||
// Unrolled and constant-unfolded for madds and parallelism:
|
||||
vec3 numerator = pow(z, s) * exp(-z);
|
||||
vec3 denom = vec3(7.0) + z - s;
|
||||
denom = vec3(5.0) + z - s + (3.0*s - vec3(9.0))/denom;
|
||||
denom = vec3(3.0) + z - s + (2.0*s - vec3(4.0))/denom;
|
||||
denom = vec3(1.0) + z - s + (s - vec3(1.0))/denom;
|
||||
return numerator / denom;
|
||||
}
|
||||
|
||||
|
||||
// Normalized lower incomplete gamma function for small s (implementation):
|
||||
vec3 normalized_ligamma_impl(vec3 s, vec3 z,
|
||||
vec3 s_inv, vec3 gamma_s_inv)
|
||||
{
|
||||
// Requires: 1.) s < ~0.5
|
||||
// 2.) s_inv = 1/s (precomputed for outside reuse)
|
||||
// 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse)
|
||||
// Returns: Approximate the normalized lower incomplete gamma function
|
||||
// for s < 0.5. Since we only care about s < 0.5, we only need
|
||||
// to evaluate two branches (not four) based on z. Each branch
|
||||
// uses four terms, with a max relative error of ~0.00182. The
|
||||
// branch threshold and specifics were adapted for fewer terms
|
||||
// from Gil/Segura/Temme's paper here:
|
||||
// http://oai.cwi.nl/oai/asset/20433/20433B.pdf
|
||||
// Evaluate both branches: Real branches test slower even when available.
|
||||
vec3 thresh = vec3(0.775075);
|
||||
bvec3 z_is_large;
|
||||
z_is_large.x = z.x > thresh.x;
|
||||
z_is_large.y = z.y > thresh.y;
|
||||
z_is_large.z = z.z > thresh.z;
|
||||
vec3 large_z = vec3(1.0) - uigamma_large_z_impl(s, z) * gamma_s_inv;
|
||||
vec3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
|
||||
bvec3 inverse_z_is_large = not(z_is_large);
|
||||
return large_z * vec3(z_is_large) + small_z * vec3(inverse_z_is_large);
|
||||
}
|
||||
|
||||
#endif // SPECIAL_FUNCTIONS_H
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user