GPU/TextureCache: Disable bilinear page upscaling by default

And fix it.
This commit is contained in:
Stenzek 2024-10-04 14:48:10 +10:00
parent 887d588029
commit c740fd9e11
No known key found for this signature in database
7 changed files with 96 additions and 40 deletions

View File

@ -1752,19 +1752,54 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent)
std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter)
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "SEMITRANSPARENT", semitransparent);
DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true);
DefineMacro(ss, "BILINEAR_FILTER", bilinear_filter);
DeclareUniformBuffer(ss, {"float4 u_texture_size"}, true);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1);
ss << R"(
{
float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw;
float4 color = SAMPLE_TEXTURE(samp0, coords);
#if BILINEAR_FILTER
// Compute the coordinates of the four texels we will be interpolating between.
// Clamp this to the triangle texture coordinates.
float2 coords = v_tex0 * u_texture_size.xy;
float2 texel_top_left = frac(coords) - float2(0.5, 0.5);
float2 texel_offset = sign(texel_top_left);
float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
float4(0.0, 0.0, 0.0, 0.0)) * u_texture_size.zwzw;
// Load four texels.
float4 s00 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.xy, 0.0);
float4 s10 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.zy, 0.0);
float4 s01 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.xw, 0.0);
float4 s11 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.zw, 0.0);
// Bilinearly interpolate.
float2 weights = abs(texel_top_left);
float4 color = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
#if !SEMITRANSPARENT
// Compute alpha from how many texels aren't pixel color 0000h.
float a00 = float(VECTOR_NEQ(s00, float4(0.0, 0.0, 0.0, 0.0)));
float a10 = float(VECTOR_NEQ(s10, float4(0.0, 0.0, 0.0, 0.0)));
float a01 = float(VECTOR_NEQ(s01, float4(0.0, 0.0, 0.0, 0.0)));
float a11 = float(VECTOR_NEQ(s11, float4(0.0, 0.0, 0.0, 0.0)));
color.a = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
// Compensate for partially transparent sampling.
color.rgb /= (color.a != 0.0) ? color.a : 1.0;
// Use binary alpha.
color.a = (color.a >= 0.5) ? 1.0 : 0.0;
#endif
#else
float4 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, 0.0);
#endif
o_col0.rgb = color.rgb;
// Alpha processing.

View File

@ -36,7 +36,7 @@ public:
std::string GenerateAdaptiveDownsampleCompositeFragmentShader();
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor);
std::string GenerateReplacementMergeFragmentShader(bool semitransparent);
std::string GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter);
private:
ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; }

View File

@ -514,7 +514,6 @@ static std::vector<std::pair<HashCache::iterator, s32>> s_hash_cache_purge_list;
static std::vector<VRAMWrite*> s_temp_vram_write_list;
static std::unique_ptr<GPUTexture> s_replacement_texture_render_target;
static std::unique_ptr<GPUPipeline> s_replacement_init_pipeline;
static std::unique_ptr<GPUPipeline> s_replacement_draw_pipeline; // copies alpha as-is
static std::unique_ptr<GPUPipeline> s_replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent)
@ -554,6 +553,7 @@ bool GPUTextureCache::IsDumpingVRAMWriteTextures()
bool GPUTextureCache::Initialize()
{
LoadLocalConfiguration(false, false);
UpdateVRAMTrackingState();
if (!CompilePipelines())
return false;
@ -571,13 +571,22 @@ void GPUTextureCache::UpdateSettings(const Settings& old_settings)
Invalidate();
DestroyPipelines();
if (!CompilePipelines())
if (!CompilePipelines()) [[unlikely]]
Panic("Failed to compile pipelines on TC settings change");
}
// Reload textures if configuration changes.
const bool old_replacement_scale_linear_filter = s_config.replacement_scale_linear_filter;
if (LoadLocalConfiguration(false, false))
{
if (s_config.replacement_scale_linear_filter != old_replacement_scale_linear_filter)
{
if (!CompilePipelines()) [[unlikely]]
Panic("Failed to compile pipelines on TC replacement settings change");
}
ReloadTextureReplacements(false);
}
}
bool GPUTextureCache::DoState(StateWrapper& sw, bool skip)
@ -755,24 +764,18 @@ bool GPUTextureCache::CompilePipelines()
plconfig.vertex_shader = fullscreen_quad_vertex_shader.get();
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateCopyFragmentShader());
if (!fs)
return false;
plconfig.fragment_shader = fs.get();
if (!(s_replacement_init_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateReplacementMergeFragmentShader(false));
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateReplacementMergeFragmentShader(false, s_config.replacement_scale_linear_filter));
if (!fs)
return false;
plconfig.fragment_shader = fs.get();
if (!(s_replacement_draw_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateReplacementMergeFragmentShader(true));
fs = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateReplacementMergeFragmentShader(true, s_config.replacement_scale_linear_filter));
if (!fs)
return false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
@ -785,7 +788,6 @@ bool GPUTextureCache::CompilePipelines()
void GPUTextureCache::DestroyPipelines()
{
s_replacement_init_pipeline.reset();
s_replacement_draw_pipeline.reset();
s_replacement_semitransparent_draw_pipeline.reset();
}
@ -3232,9 +3234,6 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash,
max_scale_y = std::min(max_scale_y, max_possible_scale);
const GSVector4 max_scale_v = GSVector4(max_scale_x, max_scale_y).xyxy();
GPUSampler* filter =
s_config.replacement_scale_linear_filter ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler();
const u32 new_width = static_cast<u32>(std::ceil(static_cast<float>(TEXTURE_PAGE_WIDTH) * max_scale_x));
const u32 new_height = static_cast<u32>(std::ceil(static_cast<float>(TEXTURE_PAGE_HEIGHT) * max_scale_y));
if (!s_replacement_texture_render_target || s_replacement_texture_render_target->GetWidth() < new_width ||
@ -3259,16 +3258,17 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash,
return;
}
// TODO: This is AWFUL. Need a better way.
// Linear filtering is also wrong, it should do hard edges for 0000 pixels.
// We could just copy this from the original image...
static constexpr const float u_src_rect[4] = {0.0f, 0.0f, 1.0f, 1.0f};
// TODO: Use rects instead of fullscreen tris, maybe avoid the copy..
alignas(VECTOR_ALIGNMENT) float uniforms[4];
GSVector2 texture_size = GSVector2(GSVector2i(entry->texture->GetWidth(), entry->texture->GetHeight()));
GSVector2::store(&uniforms[0], texture_size);
GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size);
g_gpu_device->InvalidateRenderTarget(s_replacement_texture_render_target.get());
g_gpu_device->SetRenderTarget(s_replacement_texture_render_target.get());
g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height);
g_gpu_device->SetPipeline(s_replacement_init_pipeline.get());
g_gpu_device->PushUniformBuffer(u_src_rect, sizeof(u_src_rect));
g_gpu_device->SetTextureSampler(0, entry->texture.get(), filter);
g_gpu_device->SetPipeline(s_replacement_draw_pipeline.get());
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
g_gpu_device->SetTextureSampler(0, entry->texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->Draw(3, 0);
for (const TextureReplacementSubImage& si : subimages)
@ -3280,8 +3280,11 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash,
continue;
const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v);
texture_size = GSVector2(GSVector2i(temp_texture->GetWidth(), temp_texture->GetHeight()));
GSVector2::store(&uniforms[0], texture_size);
GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size);
g_gpu_device->SetViewportAndScissor(dst_rect);
g_gpu_device->SetTextureSampler(0, temp_texture.get(), filter);
g_gpu_device->SetTextureSampler(0, temp_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetPipeline(si.invert_alpha ? s_replacement_semitransparent_draw_pipeline.get() :
s_replacement_draw_pipeline.get());
g_gpu_device->Draw(3, 0);

View File

@ -458,7 +458,7 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si)
texture_replacements.config.convert_copies_to_writes =
si.GetBoolValue("TextureReplacements", "ConvertCopiesToWrites", false);
texture_replacements.config.replacement_scale_linear_filter =
si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", true);
si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", false);
texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u);
texture_replacements.config.max_vram_write_coalesce_width =

View File

@ -252,7 +252,7 @@ struct Settings
bool dump_c16_textures : 1 = false;
bool reduce_palette_range : 1 = true;
bool convert_copies_to_writes : 1 = false;
bool replacement_scale_linear_filter = true;
bool replacement_scale_linear_filter = false;
u32 max_vram_write_splits = 0;
u32 max_vram_write_coalesce_width = 0;

View File

@ -1183,6 +1183,9 @@ void GraphicsSettingsWidget::onTextureReplacementOptionsClicked()
SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.convertCopiesToWrites, "TextureReplacements",
"ConvertCopiesToWrites",
default_replacement_config.convert_copies_to_writes);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.replacementScaleLinearFilter, "TextureReplacements",
"ReplacementScaleLinearFilter",
default_replacement_config.replacement_scale_linear_filter);
SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteSplits, "TextureReplacements",
"MaxVRAMWriteSplits", default_replacement_config.max_vram_write_splits);
SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteCoalesceWidth, "TextureReplacements",
@ -1225,6 +1228,7 @@ void GraphicsSettingsWidget::onTextureReplacementOptionsClicked()
config.dump_c16_textures = dlgui.dumpC16Textures->isChecked();
config.reduce_palette_range = dlgui.reducePaletteRange->isChecked();
config.convert_copies_to_writes = dlgui.convertCopiesToWrites->isChecked();
config.replacement_scale_linear_filter = dlgui.replacementScaleLinearFilter->isChecked();
config.max_vram_write_splits = dlgui.maxVRAMWriteSplits->value();
config.max_vram_write_coalesce_width = dlgui.maxVRAMWriteCoalesceWidth->value();
config.max_vram_write_coalesce_height = dlgui.maxVRAMWriteCoalesceHeight->value();

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>646</width>
<height>587</height>
<width>587</width>
<height>597</height>
</rect>
</property>
<property name="windowTitle">
@ -52,6 +52,9 @@
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="openExternalLinks">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
@ -177,11 +180,22 @@
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="convertCopiesToWrites">
<property name="text">
<string>Convert Copies To Writes</string>
</property>
</widget>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0">
<widget class="QCheckBox" name="convertCopiesToWrites">
<property name="text">
<string>Convert Copies To Writes</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="replacementScaleLinearFilter">
<property name="text">
<string>Bilinear Replacement Scaling</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>