From 795ddf16e59f100c2509c5402f923e52aeacdca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 8 Dec 2017 15:18:37 +0100 Subject: [PATCH] Mipmaps: Use a uniform to specify the texture lod in SLOPE and CONST modes. Fixes the mipmap test for D3D11, Vulkan and modern GL properly, while keeping Tony Hawk games working. Also implements increasing the lod level with the render resolution properly. --- GPU/Common/FramebufferCommon.cpp | 6 ++- GPU/Common/ShaderCommon.h | 6 +-- GPU/Common/ShaderId.cpp | 5 +++ GPU/Common/ShaderId.h | 1 + GPU/Common/ShaderUniforms.cpp | 23 +++++++++++- GPU/Common/ShaderUniforms.h | 16 ++++---- GPU/Common/TextureCacheCommon.cpp | 39 +++++++++----------- GPU/D3D11/GPU_D3D11.cpp | 1 + GPU/Directx9/PixelShaderGeneratorDX9.cpp | 6 ++- GPU/GLES/FragmentShaderGeneratorGLES.cpp | 23 ++++++++++-- GPU/GLES/GPU_GLES.cpp | 7 +++- GPU/GLES/ShaderManagerGLES.cpp | 26 +++++++++++++ GPU/GLES/ShaderManagerGLES.h | 1 + GPU/GLES/TextureCacheGLES.cpp | 32 +++++++++++----- GPU/GPUCommon.cpp | 16 +++++--- GPU/GPUState.h | 6 ++- GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp | 12 +++++- GPU/Vulkan/GPU_Vulkan.cpp | 1 + GPU/Vulkan/ShaderManagerVulkan.cpp | 1 + Windows/GPU/WindowsVulkanContext.cpp | 2 +- ext/native/math/math_util.h | 29 +++++++++++++++ headless/Headless.cpp | 2 +- test.py | 2 +- 23 files changed, 203 insertions(+), 60 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index ffeaaf46a..625138bc1 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -518,7 +518,11 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame gstate_c.curRTHeight = vfb->height; gstate_c.curRTRenderWidth = vfb->renderWidth; gstate_c.curRTRenderHeight = vfb->renderHeight; - gstate_c.curRTScale = (float)vfb->renderWidth / (float)vfb->width; + float newRTScale = (float)vfb->renderWidth / (float)vfb->width; + if (gstate_c.curRTScale != newRTScale) { + gstate_c.curRTScale = newRTScale; + gstate_c.Dirty(DIRTY_TEXLOD); + } return vfb; } diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 6e4bfa8c8..cf5351eb7 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -85,15 +85,15 @@ enum : uint64_t { DIRTY_BONEMATRIX6 = 1ULL << 30, DIRTY_BONEMATRIX7 = 1ULL << 31, - // These are for hardware tessellation - DIRTY_BEZIERSPLINE = 1ULL << 32, + DIRTY_BEZIERSPLINE = 1ULL << 32, // For hardware tesselation DIRTY_TEXCLAMP = 1ULL << 33, + DIRTY_TEXLOD = 1ULL << 34, // space for 7 more uniforms. DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x3FFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 3a8f0de37..9447e5083 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -166,6 +167,7 @@ std::string FragmentShaderDesc(const ShaderID &id) { if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj "; if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha "; if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs "; + if (id.Bit(FS_BIT_TEXLOD)) desc << "TexLod "; if (id.Bit(FS_BIT_LMODE)) desc << "LM "; if (id.Bit(FS_BIT_ENABLE_FOG)) desc << "Fog "; if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x "; @@ -258,6 +260,9 @@ void ComputeFragmentShaderID(ShaderID *id_out) { id.SetBit(FS_BIT_TEXTURE_AT_OFFSET, textureAtOffset); } id.SetBit(FS_BIT_BGRA_TEXTURE, gstate_c.bgraTexture); + if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && gstate_c.Supports(GPU_SUPPORTS_EXPLICIT_LOD)) { + id.SetBit(FS_BIT_TEXLOD); + } } id.SetBit(FS_BIT_LMODE, lmode); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index afc7c5131..eece142c8 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -86,6 +86,7 @@ enum { FS_BIT_BLENDFUNC_B = 42, // 4 bits FS_BIT_FLATSHADE = 46, FS_BIT_BGRA_TEXTURE = 47, + FS_BIT_TEXLOD = 48, // 48+ are free. }; diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index a05757496..4050a9535 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -85,7 +85,6 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView if (g_Config.iRenderingMode == 0 && g_display_rotation != DisplayRotation::ROTATE_0) { flippedMatrix = flippedMatrix * g_display_rot_matrix; } - CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr()); } @@ -141,6 +140,28 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView ub->stencil = (float)gstate.getStencilTestRef() / 255.0; } + if (dirtyUniforms & DIRTY_TEXLOD) { + switch (gstate.getTexLevelMode()) { + case GE_TEXLEVEL_MODE_CONST: { + float scaleLog = TexLog2F((float)gstate_c.curRTScale); + float bias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f); + ub->texLod = bias + scaleLog; + break; + } + case GE_TEXLEVEL_MODE_SLOPE: + case GE_TEXLEVEL_MODE_UNKNOWN: { + float scaleLog = TexLog2F((float)gstate_c.curRTScale); + float slopeLog = TexLog2F(fabsf(gstate.getTextureLodSlope())); + float bias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f); + ub->texLod = bias + slopeLog + scaleLog + 1.0f; // The 1.0f bias is unclear where it's from... + break; + } + default: + ub->texLod = 0; + break; + } + } + // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA()); diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 1dc506518..7ededdece 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -8,13 +8,13 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = - DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | - DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | - DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | - DIRTY_BEZIERSPLINE, + DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | + DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | + DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | + DIRTY_BEZIERSPLINE | DIRTY_TEXLOD, DIRTY_LIGHT_UNIFORMS = - DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 | - DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, + DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 | + DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, }; // TODO: Split into two structs, one for software transform and one for hardware transform, to save space. @@ -28,7 +28,7 @@ struct UB_VS_FS_Base { float tex[12]; float uvScaleOffset[4]; float depthRange[4]; - float fogCoef[2]; float stencil; float pad0; + float fogCoef[2]; float stencil; float texLod; float matAmbient[4]; int spline_count_u; int spline_count_v; int spline_type_u; int spline_type_v; // Fragment data @@ -52,6 +52,7 @@ R"( mat4 proj_mtx; vec4 depthRange; vec2 fogcoef; float stencilReplace; + float texLod; vec4 matambientalpha; int spline_count_u; int spline_count_v; @@ -78,6 +79,7 @@ R"( float4x4 u_proj; float4 u_depthRange; float2 u_fogcoef; float u_stencilReplaceValue; + float u_texLod; float4 u_matambientalpha; int u_spline_count_u; int u_spline_count_v; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index ffec95f59..5bd3a0f6d 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include +#include "math/math_util.h" #include "Common/ColorConv.h" #include "Common/MemoryUtil.h" #include "Core/Config.h" @@ -127,21 +128,6 @@ int TextureCacheCommon::AttachedDrawingHeight() { return 0; } -// Produces a signed 1.23.8 value. -static int TexLog2(float delta) { - union FloatBits { - float f; - u32 u; - }; - FloatBits f; - f.f = delta; - // Use the exponent as the tex level, and the top mantissa bits for a frac. - // We can't support more than 8 bits of frac, so truncate. - int useful = (f.u >> 15) & 0xFFFF; - // Now offset so the exponent aligns with log2f (exp=127 is 0.) - return useful - 127 * 256; -} - void TextureCacheCommon::GetSamplingParams(int &minFilt, int &magFilt, bool &sClamp, bool &tClamp, float &lodBias, int maxLevel, u32 addr, GETexLevelMode &mode) { minFilt = gstate.texfilter & 0x7; magFilt = gstate.isMagnifyFilteringEnabled(); @@ -151,13 +137,15 @@ void TextureCacheCommon::GetSamplingParams(int &minFilt, int &magFilt, bool &sCl GETexLevelMode mipMode = gstate.getTexLevelMode(); mode = mipMode; bool autoMip = mipMode == GE_TEXLEVEL_MODE_AUTO; - lodBias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f); - if (mipMode == GE_TEXLEVEL_MODE_SLOPE) { - lodBias += 1.0f + TexLog2(gstate.getTextureLodSlope()) * (1.0f / 256.0f); + if (autoMip) { + float scaleLog = TexLog2F((float)gstate_c.curRTScale); + lodBias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f) + scaleLog; + } else { + lodBias = 0; // We use a uniform instead to look up the level explicitly. } // If mip level is forced to zero, disable mipmapping. - bool noMip = maxLevel == 0 || (!autoMip && lodBias <= 0.0f); + bool noMip = maxLevel == 0; if (IsFakeMipmapChange()) noMip = noMip || !autoMip; @@ -229,9 +217,16 @@ void TextureCacheCommon::UpdateSamplingParams(TexCacheEntry &entry, SamplerCache break; case GE_TEXLEVEL_MODE_CONST: case GE_TEXLEVEL_MODE_UNKNOWN: - key.maxLevel = (int)(lodBias * 256.0f); - key.minLevel = (int)(lodBias * 256.0f); - key.lodBias = 0; + if (gstate_c.Supports(GPU_SUPPORTS_EXPLICIT_LOD)) { + // We handle this in the shader, no restrictions. + key.maxLevel = entry.maxLevel * 256; + key.minLevel = 0; + key.lodBias = 0; + } else { + key.maxLevel = (int)(lodBias * 256.0f); + key.minLevel = (int)(lodBias * 256.0f); + key.lodBias = 0; + } break; case GE_TEXLEVEL_MODE_SLOPE: // It's incorrect to use the slope as a bias. Instead it should be passed diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 6fae08905..f4f9516c1 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -224,6 +224,7 @@ void GPU_D3D11::CheckGPUFeatures() { features |= GPU_SUPPORTS_INSTANCE_RENDERING; features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; features |= GPU_SUPPORTS_FBO; + features |= GPU_SUPPORTS_EXPLICIT_LOD; uint32_t fmt4444 = draw_->GetDataFormatSupport(Draw::DataFormat::A4R4G4B4_UNORM_PACK16); uint32_t fmt1555 = draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16); diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 73bd6165e..e675b952a 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -218,7 +218,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag if (doTextureProjection) { WRITE(p, " float4 t = tex.Sample(samp, In.v_texcoord.xy / In.v_texcoord.z)%s;\n", bgraTexture ? ".bgra" : ""); } else { - WRITE(p, " float4 t = tex.Sample(samp, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + if (id.Bit(FS_BIT_TEXLOD)) { + WRITE(p, " float4 t = tex.SampleLevel(samp, %s.xy, u_texLod)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " float4 t = tex.Sample(samp, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : ""); + } } } else { if (doTextureProjection) { diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index c1994b8e5..3e3f5dfec 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -47,7 +47,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform const char *fragColor0 = "gl_FragColor"; const char *fragColor1 = "fragColor1"; const char *texture = "texture2D"; - const char *texelFetch = NULL; + const char *textureLod = nullptr; + const char *texelFetch = nullptr; bool highpFog = false; bool highpTexcoord = false; bool bitwiseOps = false; @@ -61,6 +62,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, "#version 300 es\n"); // GLSL ES 3.0 fragColor0 = "fragColor0"; texture = "texture"; + textureLod = "textureLod"; glslES30 = true; bitwiseOps = true; texelFetch = "texelFetch"; @@ -113,12 +115,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform if (gl_extensions.VersionGEThan(3, 3, 0)) { fragColor0 = "fragColor0"; texture = "texture"; + textureLod = "textureLod"; glslES30 = true; bitwiseOps = true; texelFetch = "texelFetch"; WRITE(p, "#version 330\n"); } else if (gl_extensions.VersionGEThan(3, 0, 0)) { fragColor0 = "fragColor0"; + textureLod = "textureLod"; bitwiseOps = true; texelFetch = "texelFetch"; WRITE(p, "#version 130\n"); @@ -164,6 +168,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3); bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET); + bool texLod = id.Bit(FS_BIT_TEXLOD); ReplaceBlendType replaceBlend = static_cast(id.Bits(FS_BIT_REPLACE_BLEND, 3)); @@ -235,6 +240,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, "uniform vec3 u_fogcolor;\n"); WRITE(p, "%s %s float v_fogdepth;\n", varying, highpFog ? "highp" : "mediump"); } + if (texLod) { + WRITE(p, "uniform float u_texlod;\n"); + *uniformMask |= DIRTY_TEXLOD; + } if (doTexture) { WRITE(p, "%s %s vec3 v_texcoord;\n", varying, highpTexcoord ? "highp" : "mediump"); } @@ -335,9 +344,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform } if (doTextureProjection) { - WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord); + if (texLod) { + WRITE(p, " vec4 t = %sProj(tex, %s, u_texlod);\n", textureLod, texcoord); + } else { + WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord); + } } else { - WRITE(p, " vec4 t = %s(tex, %s.xy);\n", texture, texcoord); + if (texLod) { + WRITE(p, " vec4 t = %s(tex, %s.xy, u_texlod);\n", textureLod, texcoord); + } else { + WRITE(p, " vec4 t = %s(tex, %s.xy);\n", texture, texcoord); + } } WRITE(p, " vec4 p = v_color0;\n"); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 9624c4125..861088512 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -291,8 +291,13 @@ void GPU_GLES::CheckGPUFeatures() { if (!gl_extensions.IsGLES) features |= GPU_SUPPORTS_LOGIC_OP; - if (gl_extensions.GLES3 || !gl_extensions.IsGLES) + if (gl_extensions.GLES3 || !gl_extensions.IsGLES) { features |= GPU_SUPPORTS_TEXTURE_LOD_CONTROL; + } + + if (gl_extensions.GLES3) { + features |= GPU_SUPPORTS_EXPLICIT_LOD; + } if (gl_extensions.EXT_texture_filter_anisotropic) features |= GPU_SUPPORTS_ANISOTROPY; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index b83f49fb6..8d3134cdd 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -220,6 +220,7 @@ LinkedShader::LinkedShader(VShaderID VSID, Shader *vs, FShaderID FSID, Shader *f u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); u_texclamp = glGetUniformLocation(program, "u_texclamp"); u_texclampoff = glGetUniformLocation(program, "u_texclampoff"); + u_texlod = glGetUniformLocation(program, "u_texlod"); for (int i = 0; i < 4; i++) { char temp[64]; @@ -586,6 +587,31 @@ void LinkedShader::UpdateUniforms(u32 vertType, const VShaderID &vsid) { if (dirty & DIRTY_STENCILREPLACEVALUE) { glUniform1f(u_stencilReplaceValue, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); } + + if ((dirty & DIRTY_TEXLOD) && gstate_c.Supports(GPU_SUPPORTS_EXPLICIT_LOD) && u_texlod) { + float lod = 0.0; + switch (gstate.getTexLevelMode()) { + case GE_TEXLEVEL_MODE_CONST: { + float scaleLog = TexLog2F((float)gstate_c.curRTScale); + float bias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f); + lod = bias + scaleLog; + break; + } + case GE_TEXLEVEL_MODE_SLOPE: + case GE_TEXLEVEL_MODE_UNKNOWN: { + float scaleLog = TexLog2F((float)gstate_c.curRTScale); + float slopeLog = TexLog2F(fabsf(gstate.getTextureLodSlope())); + float bias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f); + lod = bias + slopeLog + scaleLog + 1.0f; // The 1.0f bias is unclear where it's from... + break; + } + default: + lod = 0; + break; + } + glUniform1f(u_texlod, lod); + } + // TODO: Could even set all bones in one go if they're all dirty. #ifdef USE_BONE_ARRAY if (u_bone != -1) { diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index fe9c0226d..27c4601a0 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -95,6 +95,7 @@ public: int u_uvscaleoffset; int u_texclamp; int u_texclampoff; + int u_texlod; // Lighting int u_ambient; diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index ebdab10e9..c357e8610 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -144,17 +144,31 @@ void TextureCacheGLES::UpdateSamplingParams(TexCacheEntry &entry, bool force) { glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, 0); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); } else if (mode == GE_TEXLEVEL_MODE_CONST) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, std::max(0.0f, std::min((float)maxLevel, lodBias))); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, std::max(0.0f, std::min((float)maxLevel, lodBias))); - } else { // mode == GE_TEXLEVEL_MODE_SLOPE) { - // It's incorrect to use the slope as a bias. Instead it should be passed - // into the shader directly as an explicit lod level, with the bias on top. For now, we just kill the - // lodBias in this mode, working around #9772. -#ifndef USING_GLES2 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, 0.0f); + if (gstate_c.Supports(GPU_SUPPORTS_EXPLICIT_LOD)) { + // we handle it in the shader. + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, 0.0f); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); + } else { + // Abuse min/max to fetch a specific lod. + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, std::max(0.0f, std::min((float)maxLevel, lodBias))); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, std::max(0.0f, std::min((float)maxLevel, lodBias))); + } + } else { // mode == GE_TEXLEVEL_MODE_SLOPE) { + if (gstate_c.Supports(GPU_SUPPORTS_EXPLICIT_LOD)) { + // we handle it in the shader. + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, 0.0f); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); + } else { + // It's incorrect to use the slope as a bias. Instead it should be passed + // into the shader directly as an explicit lod level, with the bias on top. For now, we just kill the + // lodBias in this mode, working around #9772. +#ifndef USING_GLES2 + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_LOD_BIAS, 0.0f); #endif - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, 0); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, 0); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, (float)maxLevel); + } } entry.lodBias = lodBias; } diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 241ecc3e5..5fc75c5eb 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -130,8 +130,8 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE }, - { GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommon::Execute_TexLevel }, - { GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, + { GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_TEXLOD, &GPUCommon::Execute_TexLevel }, + { GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_TEXLOD }, { GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE | DIRTY_UVSCALEOFFSET }, { GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, { GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS }, @@ -1315,14 +1315,20 @@ void GPUCommon::Execute_End(u32 op, u32 diff) { } void GPUCommon::Execute_TexLevel(u32 op, u32 diff) { - if (diff == 0xFFFFFFFF) return; - + if (diff == 0) + return; gstate.texlevel ^= diff; + // Gran Turismo hack. if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) { Flush(); } gstate.texlevel ^= diff; - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE); + if (diff & 0x00FF0000) { + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_TEXLOD); + } + if (diff & 0x00000003) { + gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE); + } } void GPUCommon::Execute_TexSize0(u32 op, u32 diff) { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index e5ffc07e1..f75a68ed2 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -147,7 +147,7 @@ struct GPUgstate { clutformat, // 0xC5 texfilter, // 0xC6 texwrap, // 0xC7 - texlevel, // 0xC8 + texlevel, // 0xC8 GE_CMD_TEXLEVEL texfunc, // 0xC9 texenvcolor, // 0xCA texflush, // 0xCB @@ -291,6 +291,7 @@ struct GPUgstate { u16 getTextureDimension(int level) const { return texsize[level] & 0xf0f;} GETexLevelMode getTexLevelMode() const { return static_cast(texlevel & 0x3); } int getTexLevelOffset16() const { return (int)(s8)((texlevel >> 16) & 0xFF); } + float getTextureLodSlope() const { return getFloat24(texlodslope); } bool isTextureMapEnabled() const { return textureMapEnable & 1; } GETexFunc getTextureFunction() const { return static_cast(texfunc & 0x7); } bool isColorDoublingEnabled() const { return (texfunc & 0x10000) != 0; } @@ -320,7 +321,6 @@ struct GPUgstate { bool isMinifyFilteringEnabled() const { return (texfilter & 1) != 0; } bool isMagnifyFilteringEnabled() const { return (texfilter >> 8) & 1; } int getTextureMaxLevel() const { return (texmode >> 16) & 0x7; } - float getTextureLodSlope() const { return getFloat24(texlodslope); } // Lighting bool isLightingEnabled() const { return lightingEnable & 1; } @@ -477,6 +477,8 @@ enum { GPU_SUPPORTS_VERTEX_TEXTURE_FETCH = FLAG_BIT(11), GPU_SUPPORTS_TEXTURE_FLOAT = FLAG_BIT(12), GPU_SUPPORTS_16BIT_FORMATS = FLAG_BIT(13), + GPU_SUPPORTS_EXPLICIT_LOD = FLAG_BIT(14), + // 2 free bits! GPU_SUPPORTS_LARGE_VIEWPORTS = FLAG_BIT(16), GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17), GPU_SUPPORTS_VAO = FLAG_BIT(18), diff --git a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp index 10faeb23f..b754e6c85 100644 --- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp @@ -171,9 +171,17 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { } if (doTextureProjection) { - WRITE(p, " vec4 t = textureProj(tex, %s);\n", texcoord); + if (id.Bit(FS_BIT_TEXLOD)) { + WRITE(p, " vec4 t = textureProjLod(tex, %s, base.texLod);\n", texcoord); + } else { + WRITE(p, " vec4 t = textureProj(tex, %s);\n", texcoord); + } } else { - WRITE(p, " vec4 t = texture(tex, %s.xy);\n", texcoord); + if (id.Bit(FS_BIT_TEXLOD)) { + WRITE(p, " vec4 t = textureLod(tex, %s.xy, base.texLod);\n", texcoord); + } else { + WRITE(p, " vec4 t = texture(tex, %s.xy);\n", texcoord); + } } WRITE(p, " vec4 p = v_color0;\n"); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 1140471a4..6e7d07ce2 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -192,6 +192,7 @@ void GPU_Vulkan::CheckGPUFeatures() { features |= GPU_SUPPORTS_INSTANCE_RENDERING; features |= GPU_SUPPORTS_VERTEX_TEXTURE_FETCH; features |= GPU_SUPPORTS_TEXTURE_FLOAT; + features |= GPU_SUPPORTS_EXPLICIT_LOD; if (vulkan_->GetFeaturesEnabled().wideLines) { features |= GPU_SUPPORTS_WIDE_LINES; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index bc6e467b7..80f2a62e1 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -20,6 +20,7 @@ #endif #include "base/logging.h" +#include "Common/StringUtils.h" #include "math/lin/matrix4x4.h" #include "math/math_util.h" #include "math/dataconv.h" diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index e0958b4f7..702c06416 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -61,7 +61,7 @@ #include "Windows/GPU/WindowsVulkanContext.h" #ifdef _DEBUG -static const bool g_validate_ = true; +static const bool g_validate_ = false; #else static const bool g_validate_ = false; #endif diff --git a/ext/native/math/math_util.h b/ext/native/math/math_util.h index b06fed514..f8ba319a9 100644 --- a/ext/native/math/math_util.h +++ b/ext/native/math/math_util.h @@ -47,6 +47,35 @@ inline uint32_t log2i(uint32_t val) { return ret; } +// Produces a signed 1.23.8 value. +inline int TexLog2(float delta) { + union FloatBits { + float f; + uint32_t u; + }; + FloatBits f; + f.f = delta; + // Use the exponent as the tex level, and the top mantissa bits for a frac. + // We can't support more than 8 bits of frac, so truncate. + int useful = (f.u >> 15) & 0xFFFF; + // Now offset so the exponent aligns with log2f (exp=127 is 0.) + return useful - 127 * 256; +} + +inline float TexLog2F(float delta) { + union FloatBits { + float f; + uint32_t u; + }; + FloatBits f; + f.f = delta; + // Use the exponent as the tex level, and the top mantissa bits for a frac. + // We can't support more than 8 bits of frac, so truncate. + int useful = (f.u >> 15) & 0xFFFF; + // Now offset so the exponent aligns with log2f (exp=127 is 0.) + return (float)(useful - 127 * 256) * (1.0f / 256.0f); +} + #define PI 3.141592653589793f #ifndef M_PI #define M_PI 3.141592653589793f diff --git a/headless/Headless.cpp b/headless/Headless.cpp index e18e32671..4bc2715d3 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -95,7 +95,7 @@ int printUsage(const char *progname, const char *reason) #if defined(HEADLESSHOST_CLASS) { fprintf(stderr, " --graphics=BACKEND use the full gpu backend (slower)\n"); - fprintf(stderr, " options: gles, software, directx9, etc.\n"); + fprintf(stderr, " options: software, directx9, directx11, vulkan, gles, null.\n"); fprintf(stderr, " --screenshot=FILE compare against a screenshot\n"); } #endif diff --git a/test.py b/test.py index bf22ff728..52ed827f8 100755 --- a/test.py +++ b/test.py @@ -27,7 +27,7 @@ PPSSPP_EXECUTABLES = [ PPSSPP_EXE = None TEST_ROOT = "pspautotests/tests/" teamcity_mode = False -TIMEOUT = 5 +TIMEOUT = 500 class Command(object): def __init__(self, cmd, data = None):