diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fe8ecb10..8c81c7550 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -916,6 +916,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/profile.h src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp src/shader_recompiler/ir/passes/hull_shader_transform.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp + src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index f20310a91..e243f63db 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -204,6 +204,7 @@ add_subdirectory(tracy) # pugixml if (NOT TARGET pugixml::pugixml) + option(PUGIXML_NO_EXCEPTIONS "" ON) add_subdirectory(pugixml) endif() diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index cc6d19075..4600d30af 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -364,7 +364,7 @@ void EmitContext::DefineInputs() { } break; } - case LogicalStage::Fragment: + case LogicalStage::Fragment: { if (info.loads.GetAny(IR::Attribute::FragCoord)) { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); } @@ -418,7 +418,13 @@ void EmitContext::DefineInputs() { spv::StorageClass::Input); } } - for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { + + const bool has_clip_distance_inputs = runtime_info.fs_info.clip_distance_emulation; + // Clip distances attribute vector is the last in inputs array + const auto num_inputs = + runtime_info.fs_info.num_inputs - (has_clip_distance_inputs ? 1 : 0); + + for (s32 i = 0; i < num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; if (input.IsDefault()) { continue; @@ -428,12 +434,13 @@ void EmitContext::DefineInputs() { const auto [primary, auxiliary] = info.fs_interpolation[i]; const Id type = F32[num_components]; const Id attr_id = [&] { + const auto bind_location = input.param_index + (has_clip_distance_inputs ? 1 : 0); if (primary == Qualifier::PerVertex && profile.supports_fragment_shader_barycentric) { - return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index), + return Name(DefineInput(TypeArray(type, ConstU32(3U)), bind_location), fmt::format("fs_in_attr{}_p", i)); } - return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i)); + return Name(DefineInput(type, bind_location), fmt::format("fs_in_attr{}", i)); }(); if (primary == Qualifier::PerVertex) { Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter @@ -450,7 +457,15 @@ void EmitContext::DefineInputs() { input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false, false, primary == Qualifier::PerVertex); } + + if (has_clip_distance_inputs) { + const auto type = F32[MaxEmulatedClipDistances]; + const auto attr_id = Name(DefineInput(type, 0), fmt::format("cldist_attr{}", 0)); + input_params[num_inputs] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, + MaxEmulatedClipDistances, false); + } break; + } case LogicalStage::Compute: if (info.loads.GetAny(IR::Attribute::WorkgroupIndex) || info.loads.GetAny(IR::Attribute::WorkgroupId)) { @@ -546,11 +561,16 @@ void EmitContext::DefineVertexBlock() { const std::array zero{f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value}; output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); - if (info.stores.GetAny(IR::Attribute::ClipDistance)) { - const Id type{TypeArray(F32[1], ConstU32(8U))}; - const Id initializer{ConstantComposite(type, zero)}; - clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output, - initializer); + const bool needs_clip_distance_emulation = l_stage == LogicalStage::Vertex && + stage == Stage::Vertex && + profile.needs_clip_distance_emulation; + if (!needs_clip_distance_emulation) { + if (info.stores.GetAny(IR::Attribute::ClipDistance)) { + const Id type{TypeArray(F32[1], ConstU32(8U))}; + const Id initializer{ConstantComposite(type, zero)}; + clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, + spv::StorageClass::Output, initializer); + } } if (info.stores.GetAny(IR::Attribute::CullDistance)) { const Id type{TypeArray(F32[1], ConstU32(8U))}; @@ -583,16 +603,27 @@ void EmitContext::DefineOutputs() { Name(output_attr_array, "out_attrs"); } } else { + const auto has_clip_distance_outputs = info.stores.GetAny(IR::Attribute::ClipDistance); + u32 num_attrs = 0u; for (u32 i = 0; i < IR::NumParams; i++) { const IR::Attribute param{IR::Attribute::Param0 + i}; if (!info.stores.GetAny(param)) { continue; } const u32 num_components = info.stores.NumComponents(param); - const Id id{DefineOutput(F32[num_components], i)}; + const Id id{ + DefineOutput(F32[num_components], i + (has_clip_distance_outputs ? 1 : 0))}; Name(id, fmt::format("out_attr{}", i)); output_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true); + ++num_attrs; + } + + if (has_clip_distance_outputs) { + clip_distances = Id{DefineOutput(F32[MaxEmulatedClipDistances], 0)}; + output_params[num_attrs] = GetAttributeInfo( + AmdGpu::NumberFormat::Float, clip_distances, MaxEmulatedClipDistances, true); + Name(clip_distances, fmt::format("cldist_attr{}", 0)); } } break; diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 84a9fafeb..e74b62817 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -101,7 +101,7 @@ std::string NameOf(Attribute attribute) { case Attribute::Param31: return "Param31"; case Attribute::ClipDistance: - return "ClipDistanace"; + return "ClipDistance"; case Attribute::CullDistance: return "CullDistance"; case Attribute::RenderTargetIndex: diff --git a/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp b/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp new file mode 100644 index 000000000..cf93142a1 --- /dev/null +++ b/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader { + +void InjectClipDistanceAttributes(IR::Program& program, RuntimeInfo& runtime_info) { + auto& info = runtime_info.fs_info; + + if (!info.clip_distance_emulation || program.info.l_stage != LogicalStage::Fragment) { + return; + } + + auto* first_block = *program.blocks.begin(); + auto it = std::ranges::find_if(first_block->Instructions(), [](const IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Prologue; + }); + ASSERT(it != first_block->end()); + ++it; + ASSERT(it != first_block->end()); + ++it; + + IR::IREmitter ir{*first_block, it}; + + // We don't know how many clip distances are exported by VS as it is not processed at this point + // yet. Here is an assumption that we will have not more than 4 of them (while max is 8) to save + // one attributes export slot. + const auto attrib = IR::Attribute::Param0 + info.num_inputs; + for (u32 comp = 0; comp < MaxEmulatedClipDistances; ++comp) { + const auto attr_read = ir.GetAttribute(attrib, comp); + const auto cond_id = ir.FPLessThan(attr_read, ir.Imm32(0.0f)); + ir.Discard(cond_id); + } + ++info.num_inputs; +} + +} // namespace Shader diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 5bf362284..f103b6736 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -8,7 +8,8 @@ namespace Shader { struct Profile; -} +void InjectClipDistanceAttributes(IR::Program& program, RuntimeInfo& runtime_info); +} // namespace Shader namespace Shader::Optimization { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 52e37bbf0..038a80733 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -41,7 +41,7 @@ struct Profile { bool needs_lds_barriers{}; bool needs_buffer_offsets{}; bool needs_unorm_fixup{}; - bool _pad0{}; + bool needs_clip_distance_emulation{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 4764ddbec..f4fa45afc 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -13,17 +13,16 @@ namespace Shader { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { size_t num_syntax_blocks{}; - for (const auto& node : syntax_list) { - if (node.type == IR::AbstractSyntaxNode::Type::Block) { + for (const auto& [_, type] : syntax_list) { + if (type == IR::AbstractSyntaxNode::Type::Block) { ++num_syntax_blocks; } } - IR::BlockList blocks; + IR::BlockList blocks{}; blocks.reserve(num_syntax_blocks); - u32 order_index{}; - for (const auto& node : syntax_list) { - if (node.type == IR::AbstractSyntaxNode::Type::Block) { - blocks.push_back(node.data.block); + for (const auto& [data, type] : syntax_list) { + if (type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(data.block); } } return blocks; @@ -60,6 +59,10 @@ IR::Program TranslateProgram(const std::span& code, Pools& pools, Inf program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); + // On NVIDIA GPUs HW interpolation of clip distance values seems broken, and we need to emulate + // it with expensive discard in PS. + Shader::InjectClipDistanceAttributes(program, runtime_info); + // Run optimization passes if (!profile.support_float64) { Shader::Optimization::LowerFp64ToFp32(program); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 8620ab970..04e176765 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -34,6 +34,7 @@ enum class LogicalStage : u32 { }; constexpr u32 MaxStageTypes = static_cast(LogicalStage::NumLogicalStages); +constexpr auto MaxEmulatedClipDistances = 4u; constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); @@ -201,14 +202,16 @@ struct FragmentRuntimeInfo { std::array inputs; std::array color_buffers; AmdGpu::ShaderExportFormat z_export_format; - u8 mrtz_mask; - bool dual_source_blending; + u8 mrtz_mask{}; + bool dual_source_blending{false}; + bool clip_distance_emulation{false}; bool operator==(const FragmentRuntimeInfo& other) const noexcept { return std::ranges::equal(color_buffers, other.color_buffers) && en_flags == other.en_flags && addr_flags == other.addr_flags && num_inputs == other.num_inputs && z_export_format == other.z_export_format && mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending && + clip_distance_emulation == other.clip_distance_emulation && std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), other.inputs.begin() + num_inputs); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a0ea58817..1b0af1d17 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -101,7 +101,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS switch (stage) { case Stage::Local: { BuildCommon(regs.ls_program); - Shader::TessellationDataConstantBuffer tess_constants; + Shader::TessellationDataConstantBuffer tess_constants{}; const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)]; hull_info->ReadTessConstantBuffer(tess_constants); info.ls_info.ls_stride = tess_constants.ls_stride; @@ -199,6 +199,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { info.fs_info.color_buffers[i] = graphics_key.color_buffers[i]; } + info.fs_info.clip_distance_emulation = + regs.vs_output_control.clip_distance_enable && + !regs.stage_enable.IsStageEnabled(static_cast(Stage::Local)) && + profile.needs_clip_distance_emulation; break; } case Stage::Compute: { @@ -266,6 +270,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_buffer_offsets = instance.StorageMinAlignment() > 4, .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk, + .needs_clip_distance_emulation = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; WarmUp(); @@ -460,7 +465,13 @@ bool PipelineCache::RefreshGraphicsStages() { infos.fill(nullptr); modules.fill(nullptr); - bind_stage(Stage::Fragment, LogicalStage::Fragment); + const auto result = bind_stage(Stage::Fragment, LogicalStage::Fragment); + if (!result && regs.vs_output_control.clip_distance_enable && + profile.needs_clip_distance_emulation) { + // TODO: need to implement a discard only fallback shader + LOG_WARNING(Render_Vulkan, + "Clip distance emulation is ineffective due to absense of fragment shader"); + } const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;