Add proper support for upscaling shaders, add Spline36 upscaling

Spline36 isn't as amazing as I had hoped heh. And it will need work to
work in GLSL ES. Anyway...

This also renames u_texcoordDelta to u_texelDelta to fit in with u_pixelDelta.
This commit is contained in:
Henrik Rydgard 2013-10-22 12:17:40 +02:00
parent 492fcb261b
commit 7d8aed096a
12 changed files with 227 additions and 52 deletions

View File

@ -40,6 +40,7 @@ void LoadPostShaderInfo(std::vector<std::string> directories) {
ShaderInfo off;
off.name = "Off";
off.section = "Off";
off.outputResolution = false;
shaderInfo.push_back(off);
for (size_t d = 0; d < directories.size(); d++) {
@ -81,6 +82,7 @@ void LoadPostShaderInfo(std::vector<std::string> directories) {
info.fragmentShaderFile = path + "/" + temp;
section.Get("Vertex", &temp, "");
info.vertexShaderFile = path + "/" + temp;
section.Get("OutputResolution", &info.outputResolution, false);
shaderInfo.erase(std::find(shaderInfo.begin(), shaderInfo.end(), info.name), shaderInfo.end());
shaderInfo.push_back(info);
}

View File

@ -32,6 +32,9 @@ struct ShaderInfo {
std::string fragmentShaderFile;
std::string vertexShaderFile;
// Run at output instead of input resolution
bool outputResolution;
// TODO: Add support for all kinds of fun options like mapping the depth buffer,
// SRGB texture reads, multiple shaders chained, etc.

View File

@ -135,14 +135,14 @@ void CenterRect(float *x, float *y, float *w, float *h,
}
}
void ClearBuffer() {
static void ClearBuffer() {
glstate.depthWrite.set(GL_TRUE);
glstate.colorMask.set(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClearColor(0,0,0,1);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
void DisableState() {
static void DisableState() {
glstate.blend.disable();
glstate.cullFace.disable();
glstate.depthTest.disable();
@ -184,6 +184,7 @@ void FramebufferManager::CompileDraw2DProgram() {
}
if (shaderInfo) {
postShaderAtOutputResolution_ = shaderInfo->outputResolution;
postShaderProgram_ = glsl_create(shaderInfo->vertexShaderFile.c_str(), shaderInfo->fragmentShaderFile.c_str(), &errorString);
if (!postShaderProgram_) {
// DO NOT turn this into a report, as it will pollute our logs with all kinds of
@ -204,9 +205,21 @@ void FramebufferManager::CompileDraw2DProgram() {
SetNumExtraFBOs(1);
float u_delta = 1.0f / PSP_CoreParameter().renderWidth;
float v_delta = 1.0f / PSP_CoreParameter().renderHeight;
int deltaLoc = glsl_uniform_loc(postShaderProgram_, "u_texcoordDelta");
float u_pixel_delta = u_delta;
float v_pixel_delta = v_delta;
if (postShaderAtOutputResolution_) {
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
u_pixel_delta = 1.0f / w;
v_pixel_delta = 1.0f / h;
}
int deltaLoc = glsl_uniform_loc(postShaderProgram_, "u_texelDelta");
if (deltaLoc != -1)
glUniform2f(deltaLoc, u_delta, v_delta);
int pixelDeltaLoc = glsl_uniform_loc(postShaderProgram_, "u_pixelDelta");
if (pixelDeltaLoc != -1)
glUniform2f(pixelDeltaLoc, u_pixel_delta, v_pixel_delta);
usePostShader_ = true;
}
} else {
@ -370,16 +383,37 @@ void FramebufferManager::DrawPixels(const u8 *framebuf, GEBufferFormat pixelForm
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
glBindTexture(GL_TEXTURE_2D,drawPixelsTex_);
if (g_Config.iTexFiltering == LINEAR || (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos))
{
if (g_Config.iTexFiltering == LINEAR || (g_Config.iTexFiltering == LINEARFMV && g_iNumVideos)) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
glTexSubImage2D(GL_TEXTURE_2D,0,0,0,512,272, GL_RGBA, GL_UNSIGNED_BYTE, pixelFormat == GE_FORMAT_8888 ? framebuf : convBuf);
DrawActiveTexture(x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, 480.0f / 512.0f);
// This draws directly at the backbuffer so if there's a post shader, we need to apply it here. Should try to unify this path
// with the regular path somehow, but this simple solution works for most of the post shaders (it always runs at output resolution so FXAA may look odd).
if (usePostShader_) {
DrawActiveTexture(0, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, 480.0f / 512.0f, 1.0f, postShaderProgram_);
} else {
DrawActiveTexture(0, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, false, 480.0f / 512.0f);
}
}
void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, bool flip, float uscale, float vscale, GLSLProgram *program) {
void FramebufferManager::DrawActiveTexture(GLuint texture, float x, float y, float w, float h, float destW, float destH, bool flip, float uscale, float vscale, GLSLProgram *program) {
if (texture) {
// We know the texture, we can do a DrawTexture shortcut on nvidia.
#if defined(USING_GLES2) && !defined(__SYMBIAN32__) && !defined(MEEGO_EDITION_HARMATTAN) && !defined(IOS)
if (gl_extensions.NV_draw_texture) {
// Fast path for Tegra. TODO: Make this path work on desktop nvidia, seems glew doesn't have a clue.
// Actually, on Desktop we should just use glBlitFramebuffer.
glDrawTextureNV(texture, 0,
x, y, w, h, 0.0f,
0, 0, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
return;
}
#endif
glBindTexture(GL_TEXTURE_2D, texture);
}
float u2 = uscale;
// Since we're flipping, 0 is down. That's where the scale goes.
float v1 = flip ? 1.0f : 1.0f - vscale;
@ -770,7 +804,7 @@ void FramebufferManager::CopyDisplayToOutput() {
}
if (!vfb) {
// Just a pointer to plain memory to draw. Draw it. And make sure to set the viewport...
// Just a pointer to plain memory to draw. Draw it.
DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_);
return;
}
@ -806,44 +840,40 @@ void FramebufferManager::CopyDisplayToOutput() {
GLuint colorTexture = fbo_get_color_texture(vfb->fbo);
// Output coordinates
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
// TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame.
// and to discard extraFBOs_ after using them.
if (usePostShader_ && extraFBOs_.size() == 1) {
glBindTexture(GL_TEXTURE_2D, colorTexture);
if (!usePostShader_) {
glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
// These are in the output display coordinates
DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
} else if (usePostShader_ && extraFBOs_.size() == 1 && !postShaderAtOutputResolution_) {
// An additional pass, FXAA to the extra FBO.
fbo_bind_as_render_target(extraFBOs_[0]);
int fbo_w, fbo_h;
fbo_get_dimensions(extraFBOs_[0], &fbo_w, &fbo_h);
glstate.viewport.set(0, 0, fbo_w, fbo_h);
DrawActiveTexture(0, 0, fbo_w, fbo_h, fbo_w, fbo_h, true, 1.0f, 1.0f, postShaderProgram_);
DrawActiveTexture(colorTexture, 0, 0, fbo_w, fbo_h, fbo_w, fbo_h, true, 1.0f, 1.0f, postShaderProgram_);
fbo_unbind();
// Use the extra FBO, with applied FXAA, as a texture.
// fbo_bind_color_as_texture(extraFBOs_[0], 0);
colorTexture = fbo_get_color_texture(extraFBOs_[0]);
glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
// These are in the output display coordinates
DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
} else {
// Use post-shader, but run shader at output resolution.
glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
// These are in the output display coordinates
DrawActiveTexture(colorTexture, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height, postShaderProgram_);
}
glstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
// These are in the output display coordinates
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
#if defined(USING_GLES2) && !defined(__SYMBIAN32__) && !defined(MEEGO_EDITION_HARMATTAN) && !defined(IOS)
if (gl_extensions.NV_draw_texture) {
// Fast path for Tegra. TODO: Make this path work on desktop nvidia, seems glew doesn't have a clue.
// Actually, on Desktop we should just use glBlitFramebuffer.
glDrawTextureNV(colorTexture, 0,
x, y, w, h, 0.0f,
0, 0, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
return;
}
#endif
glBindTexture(GL_TEXTURE_2D, colorTexture);
DrawActiveTexture(x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
glBindTexture(GL_TEXTURE_2D, 0);
}
}
@ -990,7 +1020,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFrameb
CompileDraw2DProgram();
DrawActiveTexture(x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, flip, upscale, vscale, draw2dprogram_);
DrawActiveTexture(0, x, y, w, h, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, flip, upscale, vscale, draw2dprogram_);
glBindTexture(GL_TEXTURE_2D, 0);
fbo_unbind();

View File

@ -115,7 +115,9 @@ public:
}
void DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, bool flip = false, float uscale = 1.0f, float vscale = 1.0f, GLSLProgram *program = 0);
// If texture != 0, will bind it.
void DrawActiveTexture(GLuint texture, float x, float y, float w, float h, float destW, float destH, bool flip = false, float uscale = 1.0f, float vscale = 1.0f, GLSLProgram *program = 0);
void DestroyAllFBOs();
void DecimateFBOs();
@ -202,6 +204,7 @@ private:
TextureCache *textureCache_;
ShaderManager *shaderManager_;
bool usePostShader_;
bool postShaderAtOutputResolution_;
// Used by antialiasing
std::vector<FBO *> extraFBOs_;

View File

@ -1,7 +1,7 @@
attribute vec4 a_position;
attribute vec2 a_texcoord0;
uniform mat4 u_viewproj;
uniform vec2 u_texcoordDelta;
uniform vec2 u_texelDelta;
varying vec4 v_texcoord0;
varying vec4 v_texcoord1;
@ -15,8 +15,8 @@ float scaleoffset = 0.8;
void main()
{
float x = u_texcoordDelta.x*scaleoffset;
float y = u_texcoordDelta.y*scaleoffset;
float x = u_texelDelta.x*scaleoffset;
float y = u_texelDelta.y*scaleoffset;
vec2 dg1 = vec2( x,y);
vec2 dg2 = vec2(-x,y);
vec2 sd1 = dg1*0.5;

View File

@ -4,7 +4,7 @@
attribute vec4 a_position;
attribute vec2 a_texcoord0;
uniform mat4 u_viewproj;
uniform vec2 u_texcoordDelta;
uniform vec2 u_texelDelta;
varying vec4 v_texcoord0;
varying vec4 v_texcoord1;
@ -19,8 +19,8 @@ float scaleoffset = 0.8;
void main()
{
float x = u_texcoordDelta.x*scaleoffset;
float y = u_texcoordDelta.y*scaleoffset;
float x = u_texelDelta.x*scaleoffset;
float y = u_texelDelta.y*scaleoffset;
gl_Position = u_viewproj * a_position;
v_texcoord0 = a_texcoord0.xyxy;
v_texcoord1 = v_texcoord0;

View File

@ -1,7 +1,7 @@
attribute vec4 a_position;
attribute vec2 a_texcoord0;
uniform mat4 u_viewproj;
uniform vec2 u_texcoordDelta;
uniform vec2 u_texelDelta;
varying vec4 v_texcoord0;
varying vec4 v_texcoord1;
@ -15,8 +15,8 @@ float scaleoffset = 0.8; //edge detection offset
void main()
{
float x = u_texcoordDelta.x*scaleoffset;
float y = u_texcoordDelta.y*scaleoffset;
float x = u_texelDelta.x*scaleoffset;
float y = u_texelDelta.y*scaleoffset;
vec2 dg1 = vec2( x,y);
vec2 dg2 = vec2(-x,y);
vec2 dx = vec2(x,0.0);

View File

@ -39,3 +39,8 @@ Vertex=4xhqglsl.vsh
Name=AA-Color
Fragment=aacolor.fsh
Vertex=aacolor.vsh
[UpscaleSpline36]
Name=Spline36 Upscaler
Fragment=upscale_spline36.fsh
Vertex=upscale_spline36.vsh
OutputResolution=True

View File

@ -13,7 +13,7 @@ precision mediump int;
uniform sampler2D sampler0;
// The inverse of the texture dimensions along X and Y
uniform vec2 u_texcoordDelta;
uniform vec2 u_texelDelta;
varying vec2 v_texcoord0;
void main() {
@ -23,10 +23,10 @@ void main() {
float FXAA_REDUCE_MUL = 1.0/8.0;
float FXAA_REDUCE_MIN = (1.0/128.0);
vec3 rgbNW = texture2D(sampler0, v_texcoord0.xy + (vec2(-1.0, -1.0) * u_texcoordDelta)).xyz;
vec3 rgbNE = texture2D(sampler0, v_texcoord0.xy + (vec2(+1.0, -1.0) * u_texcoordDelta)).xyz;
vec3 rgbSW = texture2D(sampler0, v_texcoord0.xy + (vec2(-1.0, +1.0) * u_texcoordDelta)).xyz;
vec3 rgbSE = texture2D(sampler0, v_texcoord0.xy + (vec2(+1.0, +1.0) * u_texcoordDelta)).xyz;
vec3 rgbNW = texture2D(sampler0, v_texcoord0.xy + (vec2(-1.0, -1.0) * u_texelDelta)).xyz;
vec3 rgbNE = texture2D(sampler0, v_texcoord0.xy + (vec2(+1.0, -1.0) * u_texelDelta)).xyz;
vec3 rgbSW = texture2D(sampler0, v_texcoord0.xy + (vec2(-1.0, +1.0) * u_texelDelta)).xyz;
vec3 rgbSE = texture2D(sampler0, v_texcoord0.xy + (vec2(+1.0, +1.0) * u_texelDelta)).xyz;
vec3 rgbM = texture2D(sampler0, v_texcoord0.xy).xyz;
vec3 luma = vec3(0.299, 0.587, 0.114);
@ -48,7 +48,7 @@ void main() {
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
dir = min(vec2(FXAA_SPAN_MAX, FXAA_SPAN_MAX),
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), dir * rcpDirMin)) * u_texcoordDelta;
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), dir * rcpDirMin)) * u_texelDelta;
vec3 rgbA = (1.0/2.0) * (
texture2D(sampler0, v_texcoord0.xy + dir * (1.0/3.0 - 0.5)).xyz +

View File

@ -1,4 +1,4 @@
uniform vec4 u_texcoordDelta;
uniform vec4 u_texelDelta;
attribute vec4 a_position;
attribute vec2 a_texcoord0;
@ -13,8 +13,8 @@ void main()
{
gl_Position=u_viewproj * a_position;
v_texcoord0=a_texcoord0.xyxy+vec4(-0.5,-0.5,-1.5,-1.5)*u_texcoordDelta.xyxy;
v_texcoord1=a_texcoord0.xyxy+vec4( 0.5,-0.5, 1.5,-1.5)*u_texcoordDelta.xyxy;
v_texcoord2=a_texcoord0.xyxy+vec4(-0.5, 0.5,-1.5, 1.5)*u_texcoordDelta.xyxy;
v_texcoord3=a_texcoord0.xyxy+vec4( 0.5, 0.5, 1.5, 1.5)*u_texcoordDelta.xyxy;
v_texcoord0=a_texcoord0.xyxy+vec4(-0.5,-0.5,-1.5,-1.5)*u_texelDelta.xyxy;
v_texcoord1=a_texcoord0.xyxy+vec4( 0.5,-0.5, 1.5,-1.5)*u_texelDelta.xyxy;
v_texcoord2=a_texcoord0.xyxy+vec4(-0.5, 0.5,-1.5, 1.5)*u_texelDelta.xyxy;
v_texcoord3=a_texcoord0.xyxy+vec4( 0.5, 0.5, 1.5, 1.5)*u_texelDelta.xyxy;
}

View File

@ -0,0 +1,120 @@
// Spline36 upscaling shader.
// See issue #3921
#ifdef GL_ES
precision mediump float;
precision mediump int;
#endif
uniform sampler2D sampler0;
varying vec2 v_position;
uniform vec2 u_texelDelta;
uniform vec2 u_pixelDelta;
const vec2 HALF_PIXEL = vec2(0.5, 0.5);
float spline36_0_1(float x) {
return ((13.0 / 11.0 * x - 453.0 / 209.0) * x - 3.0 / 209.0) * x + 1.0;
}
float spline36_1_2(float x) {
return ((-6.0 / 11.0 * x + 612.0 / 209.0) * x - 1038.0 / 209.0) * x + 540.0 / 209.0;
}
float spline36_2_3(float x) {
return ((1.0 / 11.0 * x - 159.0 / 209.0) * x + 434.0 / 209.0) * x - 384.0 / 209.0;
}
vec4 rgb(int inputX, int inputY) {
return texture2D(sampler0, (vec2(inputX, inputY) + HALF_PIXEL) * u_texelDelta);
}
vec4 interpolateHorizontally(vec2 inputPos, ivec2 inputPosFloor, int dy) {
float sumOfWeights = 0.0;
vec4 sumOfWeightedPixel = vec4(0.0);
float x;
float weight;
x = inputPos.x - float(inputPosFloor.x - 2);
weight = spline36_2_3(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x - 2, inputPosFloor.y + dy);
--x;
weight = spline36_1_2(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x - 1, inputPosFloor.y + dy);
--x;
weight = spline36_0_1(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x + 0, inputPosFloor.y + dy);
x = 1.0 - x;
weight = spline36_0_1(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x + 1, inputPosFloor.y + dy);
++x;
weight = spline36_1_2(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x + 2, inputPosFloor.y + dy);
++x;
weight = spline36_2_3(x);
sumOfWeights += weight;
sumOfWeightedPixel += weight * rgb(inputPosFloor.x + 3, inputPosFloor.y + dy);
return sumOfWeightedPixel / sumOfWeights;
}
vec4 process(vec2 outputPos) {
vec2 inputPos = outputPos / u_texelDelta;
ivec2 inputPosFloor = ivec2(inputPos);
// Vertical interporation
float sumOfWeights = 0.0;
vec4 sumOfWeightedPixel = vec4(0.0);
float weight;
float y;
y = inputPos.y - float(inputPosFloor.y - 2);
weight = spline36_2_3(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, -2);
--y;
weight = spline36_1_2(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, -1);
--y;
weight = spline36_0_1(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, +0);
y = 1.0 - y;
weight = spline36_0_1(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, +1);
++y;
weight = spline36_1_2(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, +2);
++y;
weight = spline36_2_3(y);
sumOfWeights += weight;
sumOfWeightedPixel += weight * interpolateHorizontally(inputPos, inputPosFloor, +3);
return vec4((sumOfWeightedPixel / sumOfWeights).xyz, 1.0);
}
void main()
{
gl_FragColor.rgba = process(v_position);
}

View File

@ -0,0 +1,12 @@
attribute vec4 a_position;
attribute vec2 a_texcoord0;
uniform mat4 u_viewproj;
varying vec2 v_position;
void main()
{
gl_Position = u_viewproj * a_position;
v_position = a_texcoord0;
}