Merge pull request #15820 from hrydgard/depal-shadergen-fix

Switch the depal shader generator to use the ShaderWriter, fix some issues
This commit is contained in:
Henrik Rydgård 2022-08-09 16:34:25 +02:00 committed by GitHub
commit 6c46095c3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 259 additions and 228 deletions

View File

@ -33,6 +33,7 @@ ShaderLanguageDesc::ShaderLanguageDesc(ShaderLanguage lang) {
void ShaderLanguageDesc::Init(ShaderLanguage lang) {
shaderLanguage = lang;
strcpy(driverInfo, "");
switch (lang) {
case GLSL_1xx:
// Just used in the shader test, and as a basis for the others in DetectShaderLanguage.

View File

@ -125,7 +125,9 @@ void ShaderWriter::Preamble(const char **gl_extensions, size_t num_gl_extensions
F("%s\n", gl_extensions[i]);
}
// Print some system info - useful to gather information directly from screenshots.
F("// %s\n", lang_.driverInfo);
if (strlen(lang_.driverInfo) != 0) {
F("// Driver: %s\n", lang_.driverInfo);
}
switch (stage_) {
case ShaderStage::Fragment:
C("#define DISCARD discard\n");
@ -318,9 +320,27 @@ void ShaderWriter::HighPrecisionFloat() {
}
}
void ShaderWriter::LowPrecisionFloat() {
if ((ShaderLanguageIsOpenGL(lang_.shaderLanguage) && lang_.gles) || lang_.shaderLanguage == GLSL_VULKAN) {
C("precision lowp float;\n");
}
}
void ShaderWriter::ConstFloat(const char *name, float value) {
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
case HLSL_D3D9:
F("static const float %s = %f;\n", name, value);
break;
default:
F("#define %s %f\n", name, value);
break;
}
}
void ShaderWriter::DeclareSamplers(Slice<SamplerDef> samplers) {
for (int i = 0; i < (int)samplers.size(); i++) {
DeclareTexture2D(samplers[i].name,i);
DeclareTexture2D(samplers[i].name, i);
DeclareSampler2D(samplers[i].name, i);
}
}
@ -347,24 +367,24 @@ void ShaderWriter::DeclareSampler2D(const char *name, int binding) {
// We only use separate samplers in HLSL D3D11, where we have no choice.
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
F("SamplerState %s : register(s%d);\n", name, binding);
F("SamplerState %sSamp : register(s%d);\n", name, binding);
break;
default:
break;
}
}
ShaderWriter &ShaderWriter::SampleTexture2D(const char *texName, const char *samplerName, const char *uv) {
ShaderWriter &ShaderWriter::SampleTexture2D(const char *sampName, const char *uv) {
switch (lang_.shaderLanguage) {
case HLSL_D3D11:
F("%s.Sample(%s, %s)", texName, samplerName, uv);
F("%s.Sample(%sSamp, %s)", sampName, sampName, uv);
break;
case HLSL_D3D9:
F("tex2D(%s, %s)", texName, uv);
F("tex2D(%s, %s)", sampName, uv);
break;
default:
// Note: we ignore the sampler. make sure you bound samplers to the textures correctly.
F("%s(%s, %s)", lang_.texture, texName, uv);
F("%s(%s, %s)", lang_.texture, sampName, uv);
break;
}
return *this;

View File

@ -41,6 +41,7 @@ public:
ShaderWriter(char *buffer, const ShaderLanguageDesc &lang, ShaderStage stage, const char **gl_extensions, size_t num_gl_extensions) : p_(buffer), lang_(lang), stage_(stage) {
Preamble(gl_extensions, num_gl_extensions);
}
ShaderWriter(const ShaderWriter &) = delete;
// I tried to call all three write functions "W", but only MSVC
// managed to disentangle the ambiguities, so had to give up on that.
@ -64,17 +65,20 @@ public:
// F: Formats into the buffer.
ShaderWriter &F(const char *format, ...);
ShaderWriter &endl() {
return C("\n");
}
// Useful for fragment shaders in GLES.
// We always default integers to high precision.
void HighPrecisionFloat();
// Several of the shader languages ignore samplers, beware of that.
void DeclareSampler2D(const char *name, int binding);
void DeclareTexture2D(const char *name, int binding);
void LowPrecisionFloat();
void DeclareSamplers(Slice<SamplerDef> samplers);
ShaderWriter &SampleTexture2D(const char *texName, const char *samplerName, const char *uv);
void ConstFloat(const char *name, float value);
ShaderWriter &SampleTexture2D(const char *sampName, const char *uv);
// Simple shaders with no special tricks.
void BeginVSMain(Slice<InputDef> inputs, Slice<UniformDef> uniforms, Slice<VaryingDef> varyings);
@ -95,6 +99,10 @@ public:
}
private:
// Several of the shader languages ignore samplers, beware of that.
void DeclareSampler2D(const char *name, int binding);
void DeclareTexture2D(const char *name, int binding);
void Preamble(const char **gl_extensions, size_t num_gl_extensions);
char *p_;

View File

@ -1293,7 +1293,7 @@ ShaderModule *VKContext::CreateShaderModule(ShaderStage stage, ShaderLanguage la
if (shader->Compile(vulkan_, language, data, size)) {
return shader;
} else {
ERROR_LOG(G3D, "Failed to compile shader:\n%s", (const char *)data);
ERROR_LOG(G3D, "Failed to compile shader:\n%s", (const char *)LineNumberString((const char *)data).c_str());
shader->Release();
return nullptr;
}

View File

@ -28,13 +28,8 @@
#include "GPU/Common/DepalettizeShaderCommon.h"
#include "GPU/Common/DepalettizeCommon.h"
static const InputDef vsInputs[2] = {
{ "vec2", "a_position", Draw::SEM_POSITION, },
{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },
};
static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord0", Draw::SEM_TEXCOORD0, 0, "highp" },
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[2] = {
@ -56,18 +51,6 @@ void DepalShaderCache::DeviceLost() {
Clear();
}
bool DepalShaderCache::GenerateVertexShader(char *buffer, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0);
writer.BeginVSMain(vsInputs, Slice<UniformDef>::empty(), varyings);
writer.C(" v_texcoord0 = a_texcoord0;\n");
writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n");
if (strlen(lang.viewportYSign)) {
writer.F(" gl_Position.y *= %s1.0;\n", lang.viewportYSign);
}
writer.EndVSMain(varyings);
return true;
}
Draw::Texture *DepalShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const u32 clutHash, u32 *rawClut) {
u32 clutId = GetClutID(clutFormat, clutHash);
@ -178,16 +161,19 @@ DepalShader *DepalShaderCache::GetDepalettizeShader(uint32_t clutMode, GEBufferF
char *buffer = new char[4096];
if (!vertexShader_) {
if (!GenerateVertexShader(buffer, draw_->GetShaderLanguageDesc())) {
// The vertex shader failed, no need to bother trying the fragment.
delete[] buffer;
return nullptr;
}
GenerateDepalVs(buffer, draw_->GetShaderLanguageDesc());
vertexShader_ = draw_->CreateShaderModule(ShaderStage::Vertex, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_vs");
}
// TODO: Replace with ShaderWriter-based implementation.
GenerateDepalShader(buffer, pixelFormat, draw_->GetShaderLanguageDesc().shaderLanguage);
// TODO: Parse these out of clutMode some nice way, to become a bit more stateless.
DepalConfig config;
config.clutFormat = gstate.getClutPaletteFormat();
config.startPos = gstate.getClutIndexStartPos();
config.shift = gstate.getClutIndexShift();
config.mask = gstate.getClutIndexMask();
config.pixelFormat = pixelFormat;
GenerateDepalFs(buffer, config, draw_->GetShaderLanguageDesc());
std::string src(buffer);
ShaderModule *fragShader = draw_->CreateShaderModule(ShaderStage::Fragment, draw_->GetShaderLanguageDesc().shaderLanguage, (const uint8_t *)buffer, strlen(buffer), "depal_fs");

View File

@ -43,7 +43,7 @@ public:
};
// Caches both shaders and palette textures.
class DepalShaderCache : public DepalShaderCacheCommon {
class DepalShaderCache {
public:
DepalShaderCache(Draw::DrawContext *draw);
~DepalShaderCache();
@ -62,10 +62,16 @@ public:
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);
// Exposed for testing.
static bool GenerateVertexShader(char *buffer, const ShaderLanguageDesc &lang);
private:
static uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) {
return (clutMode & 0xFFFFFF) | (pixelFormat << 24);
}
static uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) {
// Simplistic.
return clutHash ^ (uint32_t)clutFormat;
}
Draw::DrawContext *draw_;
Draw::ShaderModule *vertexShader_ = nullptr;
Draw::SamplerState *nearestSampler_ = nullptr;

View File

@ -17,83 +17,44 @@
#include <cstdio>
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/GPU/Shader.h"
#include "Common/GPU/ShaderWriter.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderCommon.h"
#include "Common/StringUtils.h"
#include "Common/Log.h"
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#define WRITE p+=sprintf
static const InputDef vsInputs[2] = {
{ "vec2", "a_position", Draw::SEM_POSITION, },
{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },
};
// TODO: Deduplicate with DepalettizeCommon.cpp
static const SamplerDef samplers[2] = {
{ "tex" },
{ "pal" },
};
static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
// Uses integer instructions available since OpenGL 3.0. Suitable for ES 3.0 as well.
void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
char *p = buffer;
if (language == HLSL_D3D11) {
WRITE(p, "SamplerState texSamp : register(s0);\n");
WRITE(p, "Texture2D<float4> tex : register(t0);\n");
WRITE(p, "Texture2D<float4> pal : register(t1);\n");
// Support for depth.
if (pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
WRITE(p, "static const float z_scale = %f;\n", factors.scale);
WRITE(p, "static const float z_offset = %f;\n", factors.offset);
}
} else if (language == GLSL_VULKAN) {
WRITE(p, "#version 450\n");
WRITE(p, "#extension GL_ARB_separate_shader_objects : enable\n");
WRITE(p, "#extension GL_ARB_shading_language_420pack : enable\n");
WRITE(p, "layout(set = 0, binding = 1) uniform sampler2D tex;\n");
WRITE(p, "layout(set = 0, binding = 2) uniform sampler2D pal;\n");
WRITE(p, "layout(location = 0) in vec2 v_texcoord0;\n");
WRITE(p, "layout(location = 0) out vec4 fragColor0;\n");
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
const int shift = config.shift;
const int mask = config.mask;
// Support for depth.
if (pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
WRITE(p, "const float z_scale = %f;\n", factors.scale);
WRITE(p, "const float z_offset = %f;\n", factors.offset);
}
} else {
if (gl_extensions.IsGLES) {
WRITE(p, "#version 300 es\n");
WRITE(p, "precision mediump float;\n");
WRITE(p, "precision highp int;\n");
} else {
WRITE(p, "#version %d\n", gl_extensions.GLSLVersion());
}
WRITE(p, "in vec2 v_texcoord0;\n");
WRITE(p, "out vec4 fragColor0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
if (pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
WRITE(p, "const float z_scale = %f;\n", factors.scale);
WRITE(p, "const float z_offset = %f;\n", factors.offset);
}
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset);
}
if (language == HLSL_D3D11) {
WRITE(p, "float4 main(in float2 v_texcoord0 : TEXCOORD0) : SV_Target {\n");
WRITE(p, " float4 color = tex.Sample(texSamp, v_texcoord0);\n");
} else {
WRITE(p, "void main() {\n");
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
}
int mask = gstate.getClutIndexMask();
int shift = gstate.getClutIndexShift();
int offset = gstate.getClutIndexStartPos();
GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
// Sampling turns our texture into floating point. To avoid this, might be able
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
// Anyhow, we simply work around this by converting back to integer, which is fine.
@ -107,87 +68,83 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
// as those that Test Drive uses for its color remapping. But would need game specific flagging.
writer.C(" vec4 color = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
int shiftedMask = mask << shift;
switch (pixelFormat) {
switch (config.pixelFormat) {
case GE_FORMAT_8888:
if (shiftedMask & 0xFF) WRITE(p, " int r = int(color.r * 255.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xFF00) WRITE(p, " int g = int(color.g * 255.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xFF0000) WRITE(p, " int b = int(color.b * 255.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xFF000000) WRITE(p, " int a = int(color.a * 255.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
if (shiftedMask & 0xFF0000) writer.C(" int b = int(color.b * 255.99);\n"); else writer.C(" int b = 0;\n");
if (shiftedMask & 0xFF000000) writer.C(" int a = int(color.a * 255.99);\n"); else writer.C(" int a = 0;\n");
writer.C(" int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
break;
case GE_FORMAT_4444:
if (shiftedMask & 0xF) WRITE(p, " int r = int(color.r * 15.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0xF0) WRITE(p, " int g = int(color.g * 15.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF00) WRITE(p, " int b = int(color.b * 15.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0xF000) WRITE(p, " int a = int(color.a * 15.99);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
if (shiftedMask & 0xF) writer.C(" int r = int(color.r * 15.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0xF0) writer.C(" int g = int(color.g * 15.99);\n"); else writer.C(" int g = 0;\n");
if (shiftedMask & 0xF00) writer.C(" int b = int(color.b * 15.99);\n"); else writer.C(" int b = 0;\n");
if (shiftedMask & 0xF000) writer.C(" int a = int(color.a * 15.99);\n"); else writer.C(" int a = 0;\n");
writer.C(" int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
break;
case GE_FORMAT_565:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x7E0) WRITE(p, " int g = int(color.g * 63.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0xF800) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
WRITE(p, " int index = (b << 11) | (g << 5) | (r);\n");
if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0x7E0) writer.C(" int g = int(color.g * 63.99);\n"); else writer.C(" int g = 0;\n");
if (shiftedMask & 0xF800) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");
writer.C(" int index = (b << 11) | (g << 5) | (r);\n");
break;
case GE_FORMAT_5551:
if (shiftedMask & 0x1F) WRITE(p, " int r = int(color.r * 31.99);\n"); else WRITE(p, " int r = 0;\n");
if (shiftedMask & 0x3E0) WRITE(p, " int g = int(color.g * 31.99);\n"); else WRITE(p, " int g = 0;\n");
if (shiftedMask & 0x7C00) WRITE(p, " int b = int(color.b * 31.99);\n"); else WRITE(p, " int b = 0;\n");
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");
if (shiftedMask & 0x3E0) writer.C(" int g = int(color.g * 31.99);\n"); else writer.C(" int g = 0;\n");
if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");
if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n");
writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
break;
case GE_FORMAT_DEPTH16:
// Remap depth buffer.
WRITE(p, " float depth = (color.x - z_offset) * z_scale;\n");
WRITE(p, " int index = int(clamp(depth, 0.0, 65535.0));\n");
writer.C(" float depth = (color.x - z_offset) * z_scale;\n");
writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");
break;
default:
break;
}
float texturePixels = 256;
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512;
float texturePixels = 256.0f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.0f;
}
if (shift) {
WRITE(p, " index = (int(uint(index) >> uint(%i)) & 0x%02x)", shift, mask);
writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask);
} else {
WRITE(p, " index = (index & 0x%02x)", mask);
writer.F(" index = (index & 0x%02x)", mask);
}
if (offset) {
WRITE(p, " | %i;\n", offset); // '|' matches what we have in gstate.h
if (config.startPos) {
writer.F(" | %d;\n", config.startPos); // '|' matches what we have in gstate.h
} else {
WRITE(p, ";\n");
writer.F(";\n");
}
if (language == HLSL_D3D11) {
WRITE(p, " return pal.Load(int3(index, 0, 0));\n");
} else {
WRITE(p, " fragColor0 = texture(pal, vec2((float(index) + 0.5) * (1.0 / %f), 0.0));\n", texturePixels);
}
WRITE(p, "}\n");
writer.F(" vec2 uv = vec2((float(index) + 0.5) * %f, 0.0);\n", 1.0f / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n");
}
// FP only, to suit GL(ES) 2.0
void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage lang) {
char *p = buffer;
const char *modFunc = lang == HLSL_D3D9 ? "fmod" : "mod";
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
char lookupMethod[128] = "index.r";
char offset[128] = "";
const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
const u32 clutBase = gstate.getClutIndexStartPos();
const int shift = config.shift;
const int mask = config.mask;
const int shift = gstate.getClutIndexShift();
const int mask = gstate.getClutIndexMask();
if (config.pixelFormat == GE_FORMAT_DEPTH16) {
DepthScaleFactors factors = GetDepthScaleFactors();
writer.ConstFloat("z_scale", factors.scale);
writer.ConstFloat("z_offset", factors.offset);
}
float index_multiplier = 1.0f;
// pixelformat is the format of the texture we are sampling.
bool formatOK = true;
switch (pixelFormat) {
switch (config.pixelFormat) {
case GE_FORMAT_8888:
if ((mask & (mask + 1)) == 0) {
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
@ -196,7 +153,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
if (rgba_shift == 0 && mask == 0xFF) {
sprintf(lookupMethod, "index.%c", rgba[shift]);
} else {
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
// Format was OK if there weren't bits from another component.
formatOK = mask <= 255 - (1 << rgba_shift);
@ -214,7 +171,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
index_multiplier = 15.0f / 256.0f;
} else {
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 15 - (1 << rgba_shift);
}
@ -234,7 +191,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
} else {
// We just need to divide the right component by the right value, and then mod against the mask.
// A common case is shift=1, mask=0f.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
}
@ -254,7 +211,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
index_multiplier = 1.0f / 256.0f;
} else {
// A isn't possible here.
sprintf(lookupMethod, "%s(index.%c * %f, %d.0)", modFunc, rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
sprintf(lookupMethod, "fmod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
index_multiplier = 1.0f / 256.0f;
formatOK = mask <= 31 - (1 << rgba_shift);
}
@ -265,9 +222,14 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
case GE_FORMAT_DEPTH16:
{
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
if ((mask & (mask + 1)) == 0 && shift < 16) {
if (shift < 16) {
index_multiplier = 1.0f / (float)(1 << shift);
truncate_cpy(lookupMethod, "index.r");
truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");
if ((mask & (mask + 1)) != 0) {
// But we'll try with the above anyway.
formatOK = false;
}
} else {
formatOK = false;
}
@ -278,7 +240,7 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
}
float texturePixels = 256.f;
if (clutFormat != GE_CMODE_32BIT_ABGR8888) {
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
index_multiplier *= 0.5f;
}
@ -287,69 +249,50 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
// index_multiplier -= 0.01f / texturePixels;
if (!formatOK) {
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", pixelFormat, shift, mask, clutBase);
ERROR_LOG_REPORT_ONCE(depal, G3D, "%i depal unsupported: shift=%i mask=%02x offset=%d", config.pixelFormat, shift, mask, config.startPos);
}
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
// Technically, the clutBase should be |'d, not added, but that's hard with floats.
float texel_offset = ((float)clutBase + 0.5f) / texturePixels;
float texel_offset = ((float)config.startPos + 0.5f) / texturePixels;
char offset[128] = "";
sprintf(offset, " + %f", texel_offset);
if (lang == GLSL_1xx) {
if (gl_extensions.IsGLES) {
WRITE(p, "#version 100\n");
WRITE(p, "precision mediump float;\n");
} else {
WRITE(p, "#version %d\n", gl_extensions.GLSLVersion());
if (gl_extensions.VersionGEThan(3, 0, 0)) {
WRITE(p, "#define gl_FragColor fragColor0\n");
WRITE(p, "out vec4 fragColor0;\n");
}
}
WRITE(p, "varying vec2 v_texcoord0;\n");
WRITE(p, "uniform sampler2D tex;\n");
WRITE(p, "uniform sampler2D pal;\n");
WRITE(p, "void main() {\n");
WRITE(p, " vec4 index = texture2D(tex, v_texcoord0);\n");
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
WRITE(p, " gl_FragColor = texture2D(pal, vec2(coord, 0.0));\n");
WRITE(p, "}\n");
} else if (lang == HLSL_D3D9) {
WRITE(p, "sampler tex: register(s0);\n");
WRITE(p, "sampler pal: register(s1);\n");
WRITE(p, "float4 main(float2 v_texcoord0 : TEXCOORD0) : COLOR0 {\n");
WRITE(p, " float4 index = tex2D(tex, v_texcoord0);\n");
WRITE(p, " float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
WRITE(p, " return tex2D(pal, float2(coord, 0.0));\n");
WRITE(p, "}\n");
}
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
writer.F(" float coord = (%s * %f)%s;\n", lookupMethod, index_multiplier, offset);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language) {
switch (language) {
case GLSL_1xx:
GenerateDepalShaderFloat(buffer, pixelFormat, language);
break;
case GLSL_3xx:
case GLSL_VULKAN:
case HLSL_D3D11:
GenerateDepalShader300(buffer, pixelFormat, language);
break;
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0);
writer.DeclareSamplers(samplers);
writer.HighPrecisionFloat();
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings);
switch (lang.shaderLanguage) {
case HLSL_D3D9:
GenerateDepalShaderFloat(buffer, pixelFormat, language);
case GLSL_1xx:
GenerateDepalShaderFloat(writer, config, lang);
break;
case GLSL_VULKAN:
case GLSL_3xx:
case HLSL_D3D11:
GenerateDepalShader300(writer, config, lang);
break;
default:
_assert_msg_(false, "Depal shader language not supported: %d", (int)language);
_assert_msg_(false, "Depal shader language not supported: %d", (int)lang.shaderLanguage);
}
writer.EndFSMain("outColor");
}
uint32_t DepalShaderCacheCommon::GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) const {
return (clutMode & 0xFFFFFF) | (pixelFormat << 24);
}
uint32_t DepalShaderCacheCommon::GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) const {
// Simplistic.
return clutHash ^ (uint32_t)clutFormat;
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang) {
ShaderWriter writer(buffer, lang, ShaderStage::Vertex, nullptr, 0);
writer.BeginVSMain(vsInputs, Slice<UniformDef>::empty(), varyings);
writer.C(" v_texcoord = a_texcoord0;\n");
writer.C(" gl_Position = vec4(a_position, 0.0, 1.0);\n");
if (strlen(lang.viewportYSign)) {
writer.F(" gl_Position.y *= %s1.0;\n", lang.viewportYSign);
}
writer.EndVSMain(varyings);
}
#undef WRITE

View File

@ -24,13 +24,13 @@
static const int DEPAL_TEXTURE_OLD_AGE = 120;
void GenerateDepalShader(char *buffer, GEBufferFormat pixelFormat, ShaderLanguage language);
class DepalShaderCacheCommon {
public:
virtual ~DepalShaderCacheCommon() {}
protected:
uint32_t GenerateShaderID(uint32_t clutMode, GEBufferFormat pixelFormat) const;
uint32_t GetClutID(GEPaletteFormat clutFormat, uint32_t clutHash) const;
struct DepalConfig {
int mask;
int shift;
u32 startPos;
GEPaletteFormat clutFormat;
GEBufferFormat pixelFormat;
};
void GenerateDepalFs(char *buffer, const DepalConfig &config, const ShaderLanguageDesc &lang);
void GenerateDepalVs(char *buffer, const ShaderLanguageDesc &lang);

View File

@ -35,12 +35,15 @@ static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[1] = {
{ "tex" },
};
void GenerateDraw2DFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0);
writer.DeclareSampler2D("samp", 0);
writer.DeclareTexture2D("tex", 0);
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings);
writer.C(" vec4 outColor = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n");
writer.C(" vec4 outColor = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.EndFSMain("outColor");
}

View File

@ -10,6 +10,10 @@ static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[1] = {
{ "tex" }
};
// TODO: We could possibly have an option to preserve any extra color precision? But gonna start without it.
// Requires full size integer math. It would be possible to make a floating point-only version with lots of
// modulo and stuff, might do it one day.
@ -22,12 +26,11 @@ bool GenerateReinterpretFragmentShader(char *buffer, GEBufferFormat from, GEBuff
writer.HighPrecisionFloat();
writer.DeclareSampler2D("samp", 0);
writer.DeclareTexture2D("tex", 0);
writer.DeclareSamplers(samplers);
writer.BeginFSMain(Slice<UniformDef>::empty(), varyings);
writer.C(" vec4 val = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n");
writer.C(" vec4 val = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
switch (from) {
case GE_FORMAT_4444:

View File

@ -79,12 +79,14 @@ static const VaryingDef varyings[1] = {
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
};
static const SamplerDef samplers[1] = {
{ "tex" },
};
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) {
ShaderWriter writer(buffer, lang, ShaderStage::Fragment, nullptr, 0);
writer.HighPrecisionFloat();
writer.DeclareSampler2D("samp", 0);
writer.DeclareTexture2D("tex", 0);
writer.DeclareSamplers(samplers);
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
writer.C("layout (depth_unchanged) out float gl_FragDepth;\n");
@ -94,7 +96,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
writer.BeginFSMain(uniforms, varyings);
writer.C(" vec4 index = ").SampleTexture2D("tex", "samp", "v_texcoord.xy").C(";\n");
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
writer.C(" vec4 outColor = index.aaaa;\n"); // Only care about a.
writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n");
// Bitwise operations on floats, ugh.

View File

@ -1897,7 +1897,6 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
draw_->InvalidateCachedState();
InvalidateLastTexture();
return;
}
@ -1922,10 +1921,10 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
shaderApply.Use();
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
draw_->BindTexture(1, clutTexture);
Draw::SamplerState *nearest = depalShaderCache_->GetSampler();
draw_->BindSamplerStates(0, 1, &nearest);
draw_->BindSamplerStates(1, 1, &nearest);
draw_->BindTexture(1, clutTexture);
shaderApply.Shade();
draw_->BindTexture(0, nullptr);

View File

@ -244,7 +244,7 @@ UI::EventReturn CwCheatScreen::OnImportCheat(UI::EventParams &params) {
if (line[0] == '_' && (line[1] == 'S' || line[1] == 'G') && title.size() < 2) {
title.push_back(line);
} else if (parseCheatEntry && (line[0] == '_' && (line[1] == 'C' || line[1] == 'L')) || line[0] == '/' || line[0] == '#') {
} else if (parseCheatEntry && ((line[0] == '_' && (line[1] == 'C' || line[1] == 'L')) || line[0] == '/' || line[0] == '#')) {
newList.push_back(line);
}
}

View File

@ -14,6 +14,7 @@
#include "GPU/Common/VertexShaderGenerator.h"
#include "GPU/Common/ReinterpretFramebuffer.h"
#include "GPU/Common/StencilCommon.h"
#include "GPU/Common/DepalettizeShaderCommon.h"
#if PPSSPP_PLATFORM(WINDOWS)
#include "GPU/D3D11/D3D11Util.h"
@ -272,6 +273,61 @@ bool TestStencilShaders() {
return !failed;
}
bool TestDepalShaders() {
Draw::Bugs bugs;
ShaderLanguage languages[] = {
#if PPSSPP_PLATFORM(WINDOWS)
ShaderLanguage::HLSL_D3D9,
ShaderLanguage::HLSL_D3D11,
#endif
ShaderLanguage::GLSL_VULKAN,
ShaderLanguage::GLSL_3xx,
ShaderLanguage::GLSL_1xx,
};
char *buffer = new char[65536];
bool failed = false;
for (int k = 0; k < ARRAY_SIZE(languages); k++) {
printf("=== %s ===\n\n", ShaderLanguageToString(languages[k]));
ShaderLanguageDesc desc(languages[k]);
std::string errorMessage;
// TODO: Try some different configurations of the fragment shader.
// But first just try one.
DepalConfig config{};
config.clutFormat = GE_CMODE_16BIT_ABGR4444;
config.shift = 8;
config.startPos = 64;
config.mask = 0xFF;
config.pixelFormat = GE_FORMAT_8888;
GenerateDepalFs(buffer, config, desc);
if (!TestCompileShader(buffer, languages[k], ShaderStage::Fragment, &errorMessage)) {
printf("Error compiling depal shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str());
failed = true;
return false;
} else {
printf("===\n%s\n===\n", buffer);
}
GenerateDepalVs(buffer, desc);
if (!TestCompileShader(buffer, languages[k], ShaderStage::Vertex, &errorMessage)) {
printf("Error compiling depal shader:\n\n%s\n\n%s\n", LineNumberString(buffer).c_str(), errorMessage.c_str());
failed = true;
return false;
} else {
printf("===\n%s\n===\n", buffer);
}
}
delete[] buffer;
return !failed;
}
const ShaderLanguage languages[] = {
#if PPSSPP_PLATFORM(WINDOWS)
ShaderLanguage::HLSL_D3D9,
@ -427,6 +483,10 @@ bool TestShaderGenerators() {
return false;
}
if (!TestDepalShaders()) {
return false;
}
if (!TestFragmentShaders()) {
return false;
}