ppsspp/GPU/GLES/ShaderManagerGLES.cpp
Henrik Rydgård 9d1355e137 Always do the vertex shader part of the fog computation.
In #16104, we drastically reduced the number of shader variants for
games that use flexible lighting setups. I looked at a few games and it
seems that a lot of games have the same shaders with fog on/off, while
fog is super cheap to compute. So let's just always do it, reducing
vertex shader variants further (though the amount of pipelines will probably
remain the same, since we still specialize the fragment shader).

Might also be worth adding a dynamic bool for the fragment shader, but
if so, doing it separately.
2022-09-26 09:30:54 +02:00

1135 lines
38 KiB
C++

// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#if defined(_WIN32) && defined(SHADERLOG)
#include "Common/CommonWindows.h"
#endif
#include <cmath>
#include <cstdio>
#include <map>
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/GPU/OpenGL/GLDebugLog.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/Data/Text/I18n.h"
#include "Common/Math/math_util.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Common/Profiler/Profiler.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/thin3d.h"
#include "Common/GPU/OpenGL/GLRenderManager.h"
#include "Common/System/Display.h"
#include "Common/VR/PPSSPPVR.h"
#include "Common/Log.h"
#include "Common/File/FileUtil.h"
#include "Common/TimeUtil.h"
#include "Core/Config.h"
#include "Core/Host.h"
#include "Core/Reporting.h"
#include "Core/System.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/GLES/DrawEngineGLES.h"
#include "GPU/GLES/FramebufferManagerGLES.h"
using namespace Lin;
Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, const ShaderDescGLES &params)
: render_(render), useHWTransform_(params.useHWTransform), attrMask_(params.attrMask), uniformMask_(params.uniformMask) {
PROFILE_THIS_SCOPE("shadercomp");
isFragment_ = params.glShaderType == GL_FRAGMENT_SHADER;
source_ = code;
#ifdef SHADERLOG
#ifdef _WIN32
OutputDebugStringUTF8(code);
#else
printf("%s\n", code);
#endif
#endif
shader = render->CreateShader(params.glShaderType, source_, desc);
}
Shader::~Shader() {
render_->DeleteShader(shader);
}
LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, FShaderID FSID, Shader *fs, bool useHWTransform, bool preloading)
: render_(render), useHWTransform_(useHWTransform) {
PROFILE_THIS_SCOPE("shaderlink");
vs_ = vs;
std::vector<GLRShader *> shaders;
shaders.push_back(vs->shader);
shaders.push_back(fs->shader);
std::vector<GLRProgram::Semantic> semantics;
semantics.push_back({ ATTR_POSITION, "position" });
semantics.push_back({ ATTR_TEXCOORD, "texcoord" });
if (useHWTransform_)
semantics.push_back({ ATTR_NORMAL, "normal" });
else
semantics.push_back({ ATTR_NORMAL, "fog" });
semantics.push_back({ ATTR_W1, "w1" });
semantics.push_back({ ATTR_W2, "w2" });
semantics.push_back({ ATTR_COLOR0, "color0" });
semantics.push_back({ ATTR_COLOR1, "color1" });
std::vector<GLRProgram::UniformLocQuery> queries;
queries.push_back({ &u_tex, "tex" });
queries.push_back({ &u_pal, "pal" });
queries.push_back({ &u_testtex, "testtex" });
queries.push_back({ &u_fbotex, "fbotex" });
queries.push_back({ &u_proj, "u_proj" });
queries.push_back({ &u_proj_lens, "u_proj_lens" });
queries.push_back({ &u_proj_through, "u_proj_through" });
queries.push_back({ &u_texenv, "u_texenv" });
queries.push_back({ &u_fogcolor, "u_fogcolor" });
queries.push_back({ &u_fogcoef, "u_fogcoef" });
queries.push_back({ &u_alphacolorref, "u_alphacolorref" });
queries.push_back({ &u_alphacolormask, "u_alphacolormask" });
queries.push_back({ &u_colorWriteMask, "u_colorWriteMask" });
queries.push_back({ &u_stencilReplaceValue, "u_stencilReplaceValue" });
queries.push_back({ &u_blendFixA, "u_blendFixA" });
queries.push_back({ &u_blendFixB, "u_blendFixB" });
queries.push_back({ &u_fbotexSize, "u_fbotexSize" });
// Transform
queries.push_back({ &u_view, "u_view" });
queries.push_back({ &u_world, "u_world" });
queries.push_back({ &u_texmtx, "u_texmtx" });
if (VSID.Bit(VS_BIT_ENABLE_BONES))
numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
else
numBones = 0;
queries.push_back({ &u_depthRange, "u_depthRange" });
queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });
queries.push_back({ &u_rotation, "u_rotation" });
if (IsVRBuild()) {
queries.push_back({ &u_scaleX, "u_scaleX" });
queries.push_back({ &u_scaleY, "u_scaleY" });
}
#ifdef USE_BONE_ARRAY
queries.push_back({ &u_bone, "u_bone" });
#else
static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", };
for (int i = 0; i < 8; i++) {
queries.push_back({ &u_bone[i], boneNames[i] });
}
#endif
// Lighting, texturing
queries.push_back({ &u_ambient, "u_ambient" });
queries.push_back({ &u_matambientalpha, "u_matambientalpha" });
queries.push_back({ &u_matdiffuse, "u_matdiffuse" });
queries.push_back({ &u_matspecular, "u_matspecular" });
queries.push_back({ &u_matemissive, "u_matemissive" });
queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
queries.push_back({ &u_texclamp, "u_texclamp" });
queries.push_back({ &u_texclampoff, "u_texclampoff" });
queries.push_back({ &u_lightControl, "u_lightControl" });
for (int i = 0; i < 4; i++) {
static const char * const lightPosNames[4] = { "u_lightpos0", "u_lightpos1", "u_lightpos2", "u_lightpos3", };
queries.push_back({ &u_lightpos[i], lightPosNames[i] });
static const char * const lightdir_names[4] = { "u_lightdir0", "u_lightdir1", "u_lightdir2", "u_lightdir3", };
queries.push_back({ &u_lightdir[i], lightdir_names[i] });
static const char * const lightatt_names[4] = { "u_lightatt0", "u_lightatt1", "u_lightatt2", "u_lightatt3", };
queries.push_back({ &u_lightatt[i], lightatt_names[i] });
static const char * const lightangle_spotCoef_names[4] = { "u_lightangle_spotCoef0", "u_lightangle_spotCoef1", "u_lightangle_spotCoef2", "u_lightangle_spotCoef3", };
queries.push_back({ &u_lightangle_spotCoef[i], lightangle_spotCoef_names[i] });
static const char * const lightambient_names[4] = { "u_lightambient0", "u_lightambient1", "u_lightambient2", "u_lightambient3", };
queries.push_back({ &u_lightambient[i], lightambient_names[i] });
static const char * const lightdiffuse_names[4] = { "u_lightdiffuse0", "u_lightdiffuse1", "u_lightdiffuse2", "u_lightdiffuse3", };
queries.push_back({ &u_lightdiffuse[i], lightdiffuse_names[i] });
static const char * const lightspecular_names[4] = { "u_lightspecular0", "u_lightspecular1", "u_lightspecular2", "u_lightspecular3", };
queries.push_back({ &u_lightspecular[i], lightspecular_names[i] });
}
// We need to fetch these unconditionally, gstate_c.spline or bezier will not be set if we
// create this shader at load time from the shader cache.
queries.push_back({ &u_tess_points, "u_tess_points" });
queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
queries.push_back({ &u_spline_counts, "u_spline_counts" });
queries.push_back({ &u_depal_mask_shift_off_fmt, "u_depal_mask_shift_off_fmt" });
queries.push_back({ &u_mipBias, "u_mipBias" });
attrMask = vs->GetAttrMask();
availableUniforms = vs->GetUniformMask() | fs->GetUniformMask();
std::vector<GLRProgram::Initializer> initialize;
initialize.push_back({ &u_tex, 0, TEX_SLOT_PSP_TEXTURE });
initialize.push_back({ &u_fbotex, 0, TEX_SLOT_SHADERBLEND_SRC });
initialize.push_back({ &u_testtex, 0, TEX_SLOT_ALPHATEST });
initialize.push_back({ &u_pal, 0, TEX_SLOT_CLUT }); // CLUT
initialize.push_back({ &u_tess_points, 0, TEX_SLOT_SPLINE_POINTS }); // Control Points
initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U });
initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V });
GLRProgramFlags flags{};
flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
flags.useClipDistance0 = true;
flags.useClipDistance1 = true;
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
flags.useClipDistance2 = true;
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
flags.useClipDistance0 = true;
}
program = render->CreateProgram(shaders, semantics, queries, initialize, flags);
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL_UNIFORMS;
}
LinkedShader::~LinkedShader() {
render_->DeleteProgram(program);
}
// Utility
static inline void SetFloatUniform(GLRenderManager *render, GLint *uniform, float value) {
render->SetUniformF(uniform, 1, &value);
}
static inline void SetFloatUniform2(GLRenderManager *render, GLint *uniform, float value[2]) {
render->SetUniformF(uniform, 2, value);
}
static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 color) {
float f[4];
Uint8x4ToFloat4(f, color);
render->SetUniformF(uniform, 3, f);
}
static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
float f[4];
Uint8x3ToFloat4_AlphaUint8(f, color, alpha);
render->SetUniformF(uniform, 4, f);
}
// This passes colors unscaled (e.g. 0 - 255 not 0 - 1.)
static void SetColorUniform3Alpha255(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
const float col[4] = {
(float)((color & 0xFF) >> 0) * (1.0f / 255.0f),
(float)((color & 0xFF00) >> 8) * (1.0f / 255.0f),
(float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f),
(float)alpha * (1.0f / 255.0f)
};
render->SetUniformF(uniform, 4, col);
} else {
const float col[4] = {
(float)((color & 0xFF) >> 0),
(float)((color & 0xFF00) >> 8),
(float)((color & 0xFF0000) >> 16),
(float)alpha
};
render->SetUniformF(uniform, 4, col);
}
}
static void SetColorUniform3iAlpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
const int col[4] = {
(int)((color & 0xFF) >> 0),
(int)((color & 0xFF00) >> 8),
(int)((color & 0xFF0000) >> 16),
(int)alpha,
};
render->SetUniformI(uniform, 4, col);
}
static void SetColorUniform3ExtraFloat(GLRenderManager *render, GLint *uniform, u32 color, float extra) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
extra
};
render->SetUniformF(uniform, 4, col);
}
static void SetFloat24Uniform3(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) {
float f[4];
ExpandFloat24x3ToFloat4(f, data);
render->SetUniformF(uniform, 3, f);
}
static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data[4]) {
render->SetUniformF(uniform, 4, data);
}
static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m4x3) {
float m4x4[16];
ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
render->SetUniformM4x4(uniform, m4x4);
}
static inline void ScaleProjMatrix(Matrix4x4 &in, bool useBufferedRendering) {
float yOffset = gstate_c.vpYOffset;
if (!useBufferedRendering) {
// GL upside down is a pain as usual.
yOffset = -yOffset;
}
const Vec3 trans(gstate_c.vpXOffset, yOffset, gstate_c.vpZOffset);
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale);
in.translateAndScale(trans, scale);
}
static inline void FlipProjMatrix(Matrix4x4 &in, bool useBufferedRendering) {
const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0);
if (invertedY) {
in[1] = -in[1];
in[5] = -in[5];
in[9] = -in[9];
in[13] = -in[13];
}
const bool invertedX = gstate_c.vpWidth < 0;
if (invertedX) {
in[0] = -in[0];
in[4] = -in[4];
in[8] = -in[8];
in[12] = -in[12];
}
// In Phantasy Star Portable 2, depth range sometimes goes negative and is clamped by glDepthRange to 0,
// causing graphics clipping glitch (issue #1788). This hack modifies the projection matrix to work around it.
if (gstate_c.Supports(GPU_USE_DEPTH_RANGE_HACK)) {
float zScale = gstate.getViewportZScale() / 65535.0f;
float zCenter = gstate.getViewportZCenter() / 65535.0f;
// if far depth range < 0
if (zCenter + zScale < 0.0f) {
// if perspective projection
if (in[11] < 0.0f) {
float depthMax = gstate.getDepthRangeMax() / 65535.0f;
float depthMin = gstate.getDepthRangeMin() / 65535.0f;
float a = in[10];
float b = in[14];
float n = b / (a - 1.0f);
float f = b / (a + 1.0f);
f = (n * f) / (n + ((zCenter + zScale) * (n - f) / (depthMax - depthMin)));
a = (n + f) / (n - f);
b = (2.0f * n * f) / (n - f);
if (!my_isnan(a) && !my_isnan(b)) {
in[10] = a;
in[14] = b;
}
}
}
}
}
void LinkedShader::use(const ShaderID &VSID) {
render_->BindProgram(program);
// Note that we no longer track attr masks here - we do it for the input layouts instead.
}
void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering) {
u64 dirty = dirtyUniforms & availableUniforms;
dirtyUniforms = 0;
if (IsVRBuild()) {
dirty |= DIRTY_VIEWMATRIX;
SetVRCompat(VR_COMPAT_FOG_COLOR, gstate.fogcolor);
}
if (!dirty)
return;
if (dirty & DIRTY_DEPAL) {
int indexMask = gstate.getClutIndexMask();
int indexShift = gstate.getClutIndexShift();
int indexOffset = gstate.getClutIndexStartPos() >> 4;
int format = gstate_c.depalFramebufferFormat;
uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
// Poke in a bilinear filter flag in the top bit.
val |= gstate.isMagnifyFilteringEnabled() << 31;
render_->SetUniformUI1(&u_depal_mask_shift_off_fmt, val);
}
bool is2D, flatScreen;
if (IsVRBuild()) {
// Analyze scene
is2D = Is2DVRObject(gstate.projMatrix, gstate.isModeThrough());
flatScreen = IsFlatVRScene();
// Set HUD mode
bool is3D = gstate.isDepthWriteEnabled();
bool hud = is2D && !is3D && !flatScreen &&
gstate.isModeThrough() && //2D content requires orthographic projection
gstate.isAlphaBlendEnabled() && //2D content has to be blended
!gstate.isLightingEnabled() && //2D content cannot be rendered with lights on
!gstate.isFogEnabled(); //2D content cannot be rendered with fog on
if (hud) {
float scale = 0.5f;
render_->SetUniformF1(&u_scaleX, scale);
render_->SetUniformF1(&u_scaleY, scale / 480.0f * 272.0f);
} else {
render_->SetUniformF1(&u_scaleX, 1.0f);
render_->SetUniformF1(&u_scaleY, 1.0f);
}
}
// Update any dirty uniforms before we draw
if (dirty & DIRTY_PROJMATRIX) {
if (IsVRBuild()) {
Matrix4x4 leftEyeMatrix, rightEyeMatrix;
if (flatScreen || is2D) {
memcpy(&leftEyeMatrix, gstate.projMatrix, 16 * sizeof(float));
memcpy(&rightEyeMatrix, gstate.projMatrix, 16 * sizeof(float));
} else {
UpdateVRProjection(gstate.projMatrix, leftEyeMatrix.m, rightEyeMatrix.m);
}
FlipProjMatrix(leftEyeMatrix, useBufferedRendering);
FlipProjMatrix(rightEyeMatrix, useBufferedRendering);
ScaleProjMatrix(leftEyeMatrix, useBufferedRendering);
ScaleProjMatrix(rightEyeMatrix, useBufferedRendering);
render_->SetUniformM4x4Stereo("u_proj_lens", &u_proj_lens, leftEyeMatrix.m, rightEyeMatrix.m);
}
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
FlipProjMatrix(flippedMatrix, useBufferedRendering);
ScaleProjMatrix(flippedMatrix, useBufferedRendering);
render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
render_->SetUniformF1(&u_rotation, useBufferedRendering ? 0 : (float)g_display_rotation);
}
if (dirty & DIRTY_PROJTHROUGHMATRIX)
{
Matrix4x4 proj_through;
if (useBufferedRendering) {
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
} else {
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0.0f, 0.0f, 1.0f);
}
render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr());
}
if (dirty & DIRTY_TEXENV) {
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_ALPHACOLORREF) {
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
}
if (dirty & DIRTY_ALPHACOLORMASK) {
SetColorUniform3iAlpha(render_, &u_alphacolormask, gstate.colortestmask, gstate.getAlphaTestMask());
}
if (dirty & DIRTY_COLORWRITEMASK) {
render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)));
}
if (dirty & DIRTY_FOGCOLOR) {
SetColorUniform3(render_, &u_fogcolor, gstate.fogcolor);
}
if (dirty & DIRTY_FOGCOEF) {
float fogcoef[2] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
};
// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
if (my_isnanorinf(fogcoef[0])) {
// Not really sure what a sensible value might be, but let's try 64k.
fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
}
if (my_isnanorinf(fogcoef[1])) {
fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
}
render_->SetUniformF(&u_fogcoef, 2, fogcoef);
}
if (dirty & DIRTY_UVSCALEOFFSET) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
float uvscaleoff[4];
if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
}
render_->SetUniformF(&u_uvscaleoffset, 4, uvscaleoff);
}
if ((dirty & DIRTY_TEXCLAMP) && u_texclamp != -1) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
// First wrap xy, then half texel xy (for clamp.)
const float texclamp[4] = {
widthFactor,
heightFactor,
invW * 0.5f,
invH * 0.5f,
};
const float texclampoff[2] = {
gstate_c.curTextureXOffset * invW,
gstate_c.curTextureYOffset * invH,
};
render_->SetUniformF(&u_texclamp, 4, texclamp);
if (u_texclampoff != -1) {
render_->SetUniformF(&u_texclampoff, 2, texclampoff);
}
}
if ((dirty & DIRTY_MIPBIAS) && u_mipBias != -1) {
float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f);
mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1);
render_->SetUniformF(&u_mipBias, 1, &mipBias);
}
// Transform
if (dirty & DIRTY_WORLDMATRIX) {
SetMatrix4x3(render_, &u_world, gstate.worldMatrix);
}
if (dirty & DIRTY_VIEWMATRIX) {
if (IsVRBuild()) {
float leftEyeView[16];
float rightEyeView[16];
ConvertMatrix4x3To4x4Transposed(leftEyeView, gstate.viewMatrix);
ConvertMatrix4x3To4x4Transposed(rightEyeView, gstate.viewMatrix);
if (!flatScreen && !is2D) {
UpdateVRView(leftEyeView, rightEyeView);
}
render_->SetUniformM4x4Stereo("u_view", &u_view, leftEyeView, rightEyeView);
} else {
SetMatrix4x3(render_, &u_view, gstate.viewMatrix);
}
}
if (dirty & DIRTY_TEXMATRIX) {
SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix);
}
if (dirty & DIRTY_DEPTHRANGE) {
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// These are just the reverse of the formulas in GPUStateUtils.
float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
float viewZScale = halfActualZRange;
float viewZCenter = minz + halfActualZRange;
if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
viewZScale = vpZScale;
viewZCenter = vpZCenter;
}
float data[4] = { viewZScale, viewZCenter, gstate_c.vpZOffset, 1.0f / gstate_c.vpDepthScale };
SetFloatUniform4(render_, &u_depthRange, data);
}
if (dirty & DIRTY_CULLRANGE) {
float minValues[4], maxValues[4];
CalcCullRange(minValues, maxValues, !useBufferedRendering, true);
SetFloatUniform4(render_, &u_cullRangeMin, minValues);
SetFloatUniform4(render_, &u_cullRangeMax, maxValues);
}
if (dirty & DIRTY_STENCILREPLACEVALUE) {
float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
render_->SetUniformF(&u_stencilReplaceValue, 1, &f);
}
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
if (dirty & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
render_->SetUniformM4x4(&u_bone[i], bonetemp);
}
}
if (dirty & DIRTY_SHADERBLEND) {
if (u_blendFixA != -1) {
SetColorUniform3(render_, &u_blendFixA, gstate.getFixA());
}
if (u_blendFixB != -1) {
SetColorUniform3(render_, &u_blendFixB, gstate.getFixB());
}
const float fbotexSize[2] = {
1.0f / (float)gstate_c.curRTRenderWidth,
1.0f / (float)gstate_c.curRTRenderHeight,
};
if (u_fbotexSize != -1) {
render_->SetUniformF(&u_fbotexSize, 2, fbotexSize);
}
}
// Lighting
if (dirty & DIRTY_LIGHT_CONTROL) {
render_->SetUniformUI1(&u_lightControl, PackLightControlBits());
}
if (dirty & DIRTY_AMBIENT) {
SetColorUniform3Alpha(render_, &u_ambient, gstate.ambientcolor, gstate.getAmbientA());
}
if (dirty & DIRTY_MATAMBIENTALPHA) {
SetColorUniform3Alpha(render_, &u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
}
if (dirty & DIRTY_MATDIFFUSE) {
SetColorUniform3(render_, &u_matdiffuse, gstate.materialdiffuse);
}
if (dirty & DIRTY_MATEMISSIVE) {
SetColorUniform3(render_, &u_matemissive, gstate.materialemissive);
}
if (dirty & DIRTY_MATSPECULAR) {
SetColorUniform3ExtraFloat(render_, &u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
for (int i = 0; i < 4; i++) {
if (dirty & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
render_->SetUniformF(&u_lightpos[i], 3, vec);
} else {
SetFloat24Uniform3(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
}
if (u_lightdir[i] != -1) SetFloat24Uniform3(render_, &u_lightdir[i], &gstate.ldir[i * 3]);
if (u_lightatt[i] != -1) SetFloat24Uniform3(render_, &u_lightatt[i], &gstate.latt[i * 3]);
if (u_lightangle_spotCoef[i] != -1) {
float lightangle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
SetFloatUniform2(render_, &u_lightangle_spotCoef[i], lightangle_spotCoef);
}
if (u_lightambient[i] != -1) SetColorUniform3(render_, &u_lightambient[i], gstate.lcolor[i * 3]);
if (u_lightdiffuse[i] != -1) SetColorUniform3(render_, &u_lightdiffuse[i], gstate.lcolor[i * 3 + 1]);
if (u_lightspecular[i] != -1) SetColorUniform3(render_, &u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
}
}
if (dirty & DIRTY_BEZIERSPLINE) {
if (u_spline_counts != -1) {
render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
}
}
}
ShaderManagerGLES::ShaderManagerGLES(Draw::DrawContext *draw)
: ShaderManagerCommon(draw), fsCache_(16), vsCache_(16) {
render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
codeBuffer_ = new char[16384];
lastFSID_.set_invalid();
lastVSID_.set_invalid();
}
ShaderManagerGLES::~ShaderManagerGLES() {
delete [] codeBuffer_;
}
void ShaderManagerGLES::Clear() {
DirtyLastShader();
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
delete iter->ls;
}
fsCache_.Iterate([&](const FShaderID &key, Shader *shader) {
delete shader;
});
vsCache_.Iterate([&](const VShaderID &key, Shader *shader) {
delete shader;
});
linkedShaderCache_.clear();
fsCache_.Clear();
vsCache_.Clear();
DirtyShader();
}
void ShaderManagerGLES::ClearCache(bool deleteThem) {
// TODO: Recreate all from the diskcache when we come back.
Clear();
}
void ShaderManagerGLES::DeviceLost() {
Clear();
render_ = nullptr;
draw_ = nullptr;
}
void ShaderManagerGLES::DeviceRestore(Draw::DrawContext *draw) {
render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
draw_ = draw;
}
void ShaderManagerGLES::DirtyShader() {
// Forget the last shader ID
lastFSID_.set_invalid();
lastVSID_.set_invalid();
DirtyLastShader();
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
shaderSwitchDirtyUniforms_ = 0;
}
void ShaderManagerGLES::DirtyLastShader() {
lastShader_ = nullptr;
lastVShaderSame_ = false;
}
Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) {
uint64_t uniformMask;
std::string errorString;
if (!GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, nullptr, &errorString)) {
ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
return nullptr;
}
std::string desc = FragmentShaderDesc(FSID);
ShaderDescGLES params{ GL_FRAGMENT_SHADER, 0, uniformMask };
return new Shader(render_, codeBuffer_, desc, params);
}
Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) {
bool useHWTransform = VSID.Bit(VS_BIT_USE_HW_TRANSFORM);
uint32_t attrMask;
uint64_t uniformMask;
std::string errorString;
if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &errorString)) {
ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
return nullptr;
}
std::string desc = VertexShaderDesc(VSID);
ShaderDescGLES params{ GL_VERTEX_SHADER, attrMask, uniformMask };
params.useHWTransform = useHWTransform;
return new Shader(render_, codeBuffer_, desc, params);
}
Shader *ShaderManagerGLES::ApplyVertexShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, VShaderID *VSID) {
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
ComputeVertexShaderID(VSID, vertType, useHWTransform, useHWTessellation, weightsAsFloat);
} else {
*VSID = lastVSID_;
}
if (lastShader_ != nullptr && *VSID == lastVSID_) {
lastVShaderSame_ = true;
return lastShader_->vs_; // Already all set.
} else {
lastVShaderSame_ = false;
}
lastVSID_ = *VSID;
Shader *vs = vsCache_.Get(*VSID);
if (!vs) {
// Vertex shader not in cache. Let's compile it.
vs = CompileVertexShader(*VSID);
if (!vs || vs->Failed()) {
auto gr = GetI18NCategory("Graphics");
ERROR_LOG(G3D, "Vertex shader generation failed, falling back to software transform");
if (!g_Config.bHideSlowWarnings) {
host->NotifyUserMessage(gr->T("hardware transform error - falling back to software"), 2.5f, 0xFF3030FF);
}
delete vs;
// TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure
// that that shader ID is not used when computing the linked shader ID below, because then IDs won't match
// next time and we'll do this over and over...
// Can still work with software transform.
VShaderID vsidTemp;
ComputeVertexShaderID(&vsidTemp, vertType, false, false, weightsAsFloat);
vs = CompileVertexShader(vsidTemp);
}
vsCache_.Insert(*VSID, vs);
diskCacheDirty_ = true;
}
return vs;
}
LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, const ComputedPipelineState &pipelineState, u32 vertType, bool useBufferedRendering) {
uint64_t dirty = gstate_c.GetDirtyUniforms();
if (dirty) {
if (lastShader_)
lastShader_->dirtyUniforms |= dirty;
shaderSwitchDirtyUniforms_ |= dirty;
gstate_c.CleanUniforms();
}
FShaderID FSID;
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
} else {
FSID = lastFSID_;
}
if (lastVShaderSame_ && FSID == lastFSID_) {
lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering);
return lastShader_;
}
lastFSID_ = FSID;
Shader *fs = fsCache_.Get(FSID);
if (!fs) {
// Fragment shader not in cache. Let's compile it.
// Can't really tell if we succeeded since the compile is on the GPU thread later.
// Could fail to generate, in which case we're kinda screwed.
fs = CompileFragmentShader(FSID);
fsCache_.Insert(FSID, fs);
diskCacheDirty_ = true;
}
// Okay, we have both shaders. Let's see if there's a linked one.
LinkedShader *ls = nullptr;
u64 switchDirty = shaderSwitchDirtyUniforms_;
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
// Deferred dirtying! Let's see if we can make this even more clever later.
iter->ls->dirtyUniforms |= switchDirty;
if (iter->vs == vs && iter->fs == fs) {
ls = iter->ls;
}
}
shaderSwitchDirtyUniforms_ = 0;
if (ls == nullptr) {
_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
_dbg_assert_(FSID.Bit(FS_BIT_DO_TEXTURE) == VSID.Bit(VS_BIT_DO_TEXTURE));
_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));
// Check if we can link these.
ls = new LinkedShader(render_, VSID, vs, FSID, fs, vs->UseHWTransform());
ls->use(VSID);
const LinkedShaderCacheEntry entry(vs, fs, ls);
linkedShaderCache_.push_back(entry);
} else {
ls->use(VSID);
}
ls->UpdateUniforms(vertType, VSID, useBufferedRendering);
lastShader_ = ls;
return ls;
}
std::string Shader::GetShaderString(DebugShaderStringType type, ShaderID id) const {
switch (type) {
case SHADER_STRING_SOURCE_CODE:
return source_;
case SHADER_STRING_SHORT_DESC:
return isFragment_ ? FragmentShaderDesc(FShaderID(id)) : VertexShaderDesc(VShaderID(id));
default:
return "N/A";
}
}
std::vector<std::string> ShaderManagerGLES::DebugGetShaderIDs(DebugShaderType type) {
std::string id;
std::vector<std::string> ids;
switch (type) {
case SHADER_TYPE_VERTEX:
{
vsCache_.Iterate([&](const VShaderID &id, Shader *shader) {
std::string idstr;
id.ToString(&idstr);
ids.push_back(idstr);
});
}
break;
case SHADER_TYPE_FRAGMENT:
{
fsCache_.Iterate([&](const FShaderID &id, Shader *shader) {
std::string idstr;
id.ToString(&idstr);
ids.push_back(idstr);
});
}
break;
default:
break;
}
return ids;
}
std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
ShaderID shaderId;
shaderId.FromString(id);
switch (type) {
case SHADER_TYPE_VERTEX:
{
Shader *vs = vsCache_.Get(VShaderID(shaderId));
return vs ? vs->GetShaderString(stringType, shaderId) : "";
}
case SHADER_TYPE_FRAGMENT:
{
Shader *fs = fsCache_.Get(FShaderID(shaderId));
return fs->GetShaderString(stringType, shaderId);
}
default:
return "N/A";
}
}
// Shader pseudo-cache.
//
// We simply store the IDs of the shaders used during gameplay. On next startup of
// the same game, we simply compile all the shaders from the start, so we don't have to
// compile them on the fly later. Ideally we would store the actual compiled shaders
// rather than just their IDs, but OpenGL does not support this, except for a few obscure
// vendor-specific extensions.
//
// If things like GPU supported features have changed since the last time, we discard the cache
// as sometimes these features might have an effect on the ID bits.
#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 15
struct CacheHeader {
uint32_t magic;
uint32_t version;
uint32_t featureFlags;
uint32_t reserved;
int numVertexShaders;
int numFragmentShaders;
int numLinkedPrograms;
};
void ShaderManagerGLES::Load(const Path &filename) {
File::IOFile f(filename, "rb");
u64 sz = f.GetSize();
if (!f.IsOpen()) {
return;
}
CacheHeader header;
if (!f.ReadArray(&header, 1)) {
return;
}
if (header.magic != CACHE_HEADER_MAGIC || header.version != CACHE_VERSION || header.featureFlags != gstate_c.featureFlags) {
return;
}
diskCachePending_.start = time_now_d();
diskCachePending_.Clear();
// Sanity check the file contents
if (header.numFragmentShaders > 1000 || header.numVertexShaders > 1000 || header.numLinkedPrograms > 1000) {
ERROR_LOG(G3D, "Corrupt shader cache file header, aborting.");
return;
}
// Also make sure the size makes sense, in case there's corruption.
u64 expectedSize = sizeof(header);
expectedSize += header.numVertexShaders * sizeof(VShaderID);
expectedSize += header.numFragmentShaders * sizeof(FShaderID);
expectedSize += header.numLinkedPrograms * (sizeof(VShaderID) + sizeof(FShaderID));
if (sz != expectedSize) {
ERROR_LOG(G3D, "Shader cache file is wrong size: %lld instead of %lld", sz, expectedSize);
return;
}
diskCachePending_.vert.resize(header.numVertexShaders);
if (!f.ReadArray(&diskCachePending_.vert[0], header.numVertexShaders)) {
diskCachePending_.vert.clear();
return;
}
diskCachePending_.frag.resize(header.numFragmentShaders);
if (!f.ReadArray(&diskCachePending_.frag[0], header.numFragmentShaders)) {
diskCachePending_.vert.clear();
diskCachePending_.frag.clear();
return;
}
for (int i = 0; i < header.numLinkedPrograms; i++) {
VShaderID vsid;
FShaderID fsid;
if (!f.ReadArray(&vsid, 1)) {
return;
}
if (!f.ReadArray(&fsid, 1)) {
return;
}
diskCachePending_.link.push_back(std::make_pair(vsid, fsid));
}
// Actual compilation happens in ContinuePrecompile(), called by GPU_GLES's IsReady.
NOTICE_LOG(G3D, "Precompiling the shader cache from '%s'", filename.c_str());
diskCacheDirty_ = false;
}
bool ShaderManagerGLES::ContinuePrecompile(float sliceTime) {
auto &pending = diskCachePending_;
if (pending.Done()) {
return true;
}
PSP_SetLoading("Compiling shaders...");
double start = time_now_d();
// Let's try to keep it under sliceTime if possible.
double end = start + sliceTime;
for (size_t &i = pending.vertPos; i < pending.vert.size(); i++) {
if (time_now_d() >= end) {
// We'll finish later.
return false;
}
const VShaderID &id = pending.vert[i];
if (!vsCache_.Get(id)) {
if (id.Bit(VS_BIT_IS_THROUGH) && id.Bit(VS_BIT_USE_HW_TRANSFORM)) {
// Clearly corrupt, bailing.
ERROR_LOG_REPORT(G3D, "Corrupt shader cache: Both IS_THROUGH and USE_HW_TRANSFORM set.");
pending.Clear();
return false;
}
Shader *vs = CompileVertexShader(id);
if (vs->Failed()) {
// Give up on using the cache, just bail. We can't safely create the fallback shaders here
// without trying to deduce the vertType from the VSID.
ERROR_LOG(G3D, "Failed to compile a vertex shader loading from cache. Skipping rest of shader cache.");
delete vs;
pending.Clear();
return false;
}
vsCache_.Insert(id, vs);
} else {
WARN_LOG(G3D, "Duplicate vertex shader found in GL shader cache, ignoring");
}
}
for (size_t &i = pending.fragPos; i < pending.frag.size(); i++) {
if (time_now_d() >= end) {
// We'll finish later.
return false;
}
const FShaderID &id = pending.frag[i];
if (!fsCache_.Get(id)) {
fsCache_.Insert(id, CompileFragmentShader(id));
} else {
WARN_LOG(G3D, "Duplicate fragment shader found in GL shader cache, ignoring");
}
}
for (size_t &i = pending.linkPos; i < pending.link.size(); i++) {
if (time_now_d() >= end) {
// We'll finish later.
return false;
}
const VShaderID &vsid = pending.link[i].first;
const FShaderID &fsid = pending.link[i].second;
Shader *vs = vsCache_.Get(vsid);
Shader *fs = fsCache_.Get(fsid);
if (vs && fs) {
LinkedShader *ls = new LinkedShader(render_, vsid, vs, fsid, fs, vs->UseHWTransform(), true);
LinkedShaderCacheEntry entry(vs, fs, ls);
linkedShaderCache_.push_back(entry);
}
}
// Okay, finally done. Time to report status.
double finish = time_now_d();
NOTICE_LOG(G3D, "Precompile: Compiled and linked %d programs (%d vertex, %d fragment) in %0.1f milliseconds", (int)pending.link.size(), (int)pending.vert.size(), (int)pending.frag.size(), 1000 * (finish - pending.start));
pending.Clear();
return true;
}
void ShaderManagerGLES::CancelPrecompile() {
diskCachePending_.Clear();
}
void ShaderManagerGLES::Save(const Path &filename) {
if (!diskCacheDirty_) {
return;
}
if (linkedShaderCache_.empty()) {
return;
}
INFO_LOG(G3D, "Saving the shader cache to '%s'", filename.c_str());
FILE *f = File::OpenCFile(filename, "wb");
if (!f) {
// Can't save, give up for now.
diskCacheDirty_ = false;
return;
}
CacheHeader header;
header.magic = CACHE_HEADER_MAGIC;
header.version = CACHE_VERSION;
header.reserved = 0;
header.featureFlags = gstate_c.featureFlags;
header.numVertexShaders = GetNumVertexShaders();
header.numFragmentShaders = GetNumFragmentShaders();
header.numLinkedPrograms = GetNumPrograms();
fwrite(&header, 1, sizeof(header), f);
vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
fwrite(&id, 1, sizeof(id), f);
});
fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
fwrite(&id, 1, sizeof(id), f);
});
for (auto iter : linkedShaderCache_) {
ShaderID vsid, fsid;
vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
if (iter.vs == shader)
vsid = id;
});
fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
if (iter.fs == shader)
fsid = id;
});
fwrite(&vsid, 1, sizeof(vsid), f);
fwrite(&fsid, 1, sizeof(fsid), f);
}
fclose(f);
diskCacheDirty_ = false;
}