From 5f4ddc14fcbaf02b141c31368a3f59caa310af58 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 21 Sep 2024 21:45:56 +0200 Subject: [PATCH] Image subresources barriers (#904) * video_core: texture: image subresources state tracking * shader_recompiler: use one binding if the same image is read and written * video_core: added rebinding of changed textures after overlap resolve * don't use pointers; slight `FindTexture` refactoring * video_core: buffer_cache: don't copy over the image size * redundant barriers removed; fixes * regression fixes * texture_cache: 3d texture layers count fixup * shader_recompiler: support for partially bound cubemaps * added support for cubemap arrays * don't bind unused color buffers * fixed depth promotion to do not use stencil * doors * bonfire lit * cubemap array index calculation * final touches --- CMakeLists.txt | 2 + .../backend/spirv/emit_spirv_image.cpp | 7 +- .../backend/spirv/spirv_emit_context.cpp | 4 +- .../backend/spirv/spirv_emit_context.h | 5 +- src/shader_recompiler/frontend/decode.cpp | 1 + .../frontend/translate/export.cpp | 3 + .../frontend/translate/vector_memory.cpp | 2 + src/shader_recompiler/info.h | 6 +- .../ir/passes/resource_tracking_pass.cpp | 27 ++- src/shader_recompiler/ir/reg.h | 1 + src/shader_recompiler/specialization.h | 3 +- src/video_core/amdgpu/resource.h | 14 +- src/video_core/buffer_cache/buffer_cache.cpp | 12 +- .../renderer_vulkan/liverpool_to_vk.h | 10 ++ .../renderer_vulkan/renderer_vulkan.cpp | 10 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 35 +--- .../renderer_vulkan/vk_compute_pipeline.h | 21 +-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 57 ++---- .../renderer_vulkan/vk_graphics_pipeline.h | 32 ++-- .../renderer_vulkan/vk_instance.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 39 +++-- .../renderer_vulkan/vk_pipeline_common.cpp | 80 +++++++++ .../renderer_vulkan/vk_pipeline_common.h | 48 ++++++ .../renderer_vulkan/vk_platform.cpp | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 15 +- .../renderer_vulkan/vk_rasterizer.h | 2 +- .../renderer_vulkan/vk_scheduler.cpp | 52 ------ src/video_core/texture_cache/image.cpp | 163 +++++++++++++----- src/video_core/texture_cache/image.h | 21 ++- src/video_core/texture_cache/image_info.cpp | 14 +- src/video_core/texture_cache/image_info.h | 3 +- src/video_core/texture_cache/image_view.cpp | 40 ++++- src/video_core/texture_cache/image_view.h | 3 +- .../texture_cache/texture_cache.cpp | 39 +++-- src/video_core/texture_cache/texture_cache.h | 5 +- 35 files changed, 495 insertions(+), 283 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_common.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b334f594..1980b368 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -579,6 +579,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_master_semaphore.h src/video_core/renderer_vulkan/vk_pipeline_cache.cpp src/video_core/renderer_vulkan/vk_pipeline_cache.h + src/video_core/renderer_vulkan/vk_pipeline_common.cpp + src/video_core/renderer_vulkan/vk_pipeline_common.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h src/video_core/renderer_vulkan/vk_rasterizer.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 530f381d..1944db07 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -157,8 +157,11 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const ImageOperands operands; operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); - return ctx.OpBitcast( - ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); + const Id texel = + texture.is_storage + ? ctx.OpImageRead(result_type, image, coords, operands.mask, operands.operands) + : ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands); + return ctx.OpBitcast(ctx.F32[4], texel); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 2f1f7aa7..164c30c5 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -510,7 +510,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: - return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, false, false, sampled, format); + return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled, + format); default: break; } @@ -534,6 +535,7 @@ void EmitContext::DefineImagesAndSamplers() { .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, .image_type = image_type, + .is_storage = image_desc.is_storage, }); interfaces.push_back(id); ++binding; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0908b7f8..1a968390 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -200,6 +200,7 @@ public: Id sampled_type; Id pointer_type; Id image_type; + bool is_storage = false; }; struct BufferDefinition { @@ -216,8 +217,8 @@ public: u32 binding; Id image_type; Id result_type; - bool is_integer; - bool is_storage; + bool is_integer = false; + bool is_storage = false; }; u32& binding; diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 26a2c1a6..6020f93b 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -1032,6 +1032,7 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) { m_instruction.control.mimg = *reinterpret_cast(&hexInstruction); m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode); + ASSERT(m_instruction.control.mimg.r128 == 0); } void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) { diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 18e830f7..7d901822 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -71,6 +71,9 @@ void Translator::EmitExport(const GcnInst& inst) { ir.SetAttribute(attrib, comp, swizzle(i)); } } + if (IR::IsMrt(attrib)) { + info.mrt_mask |= 1u << u8(attrib); + } } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 212d7fdc..7ecc2e76 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -546,6 +546,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); info.explicit_lod.Assign(explicit_lod); info.has_derivatives.Assign(has_derivatives); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { @@ -630,6 +631,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { info.has_offset.Assign(flags.test(MimgModifier::Offset)); // info.explicit_lod.Assign(explicit_lod); info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); + info.is_array.Assign(mimg.da); // Issue IR instruction, leaving unknown fields blank to patch later. const IR::Value texel = [&]() -> IR::Value { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d8282bf4..ac623253 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -64,9 +64,10 @@ struct ImageResource { u32 dword_offset; AmdGpu::ImageType type; AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; + bool is_storage{}; + bool is_depth{}; bool is_atomic{}; + bool is_array{}; constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; @@ -171,6 +172,7 @@ struct Info { bool uses_fp64{}; bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions + u8 mrt_mask{0u}; explicit Info(Stage stage_, ShaderParams params) : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index a56c9f49..db0d75f0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -200,9 +200,10 @@ public: u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && - desc.dword_offset == existing.dword_offset && desc.type == existing.type && - desc.is_storage == existing.is_storage; + desc.dword_offset == existing.dword_offset; })}; + auto& image = image_resources[index]; + image.is_storage |= desc.is_storage; return index; } @@ -441,18 +442,29 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, - const IR::Value& z, bool is_storage) { + const IR::Value& z, bool is_storage, bool is_array) { // When cubemap is written with imageStore it is treated like 2DArray. if (is_storage) { return ir.CompositeConstruct(s, t, z); } + + ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below + // We need to fix x and y coordinate, // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. // We already force the scale value to be 1.0 when handling v_cubema_f32, // here we subtract 1.5 to recover the original value. const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); - return ir.CompositeConstruct(x, y, z); + if (is_array) { + const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z}); + const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u)); + const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u)); + return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id), + ir.ConvertIToF(32, 32, false, slice_id)); + } else { + return ir.CompositeConstruct(x, y, z); + } } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { @@ -481,14 +493,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_storage = IsImageStorageInstruction(inst); + const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, - .type = image.GetType(), + .type = type, .nfmt = static_cast(image.GetNumberFmt()), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), + .is_array = bool(inst_info.is_array), }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions @@ -545,7 +559,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip case AmdGpu::ImageType::Color3D: // x, y, z return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; case AmdGpu::ImageType::Cube: // x, y, face - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage), + return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage, + inst_info.is_array), body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index fba04f33..4783d08e 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -59,6 +59,7 @@ union TextureInstInfo { BitField<5, 1, u32> has_offset; BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; + BitField<9, 1, u32> is_array; }; union BufferInstInfo { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index bbcafdb8..e95559d0 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -62,7 +62,8 @@ struct StageSpecialization { }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetType(); + spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray + : sharp.GetType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); }); } diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 41dbe801..fc572a04 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -238,10 +238,15 @@ struct Image { return pitch + 1; } - u32 NumLayers() const { + u32 NumLayers(bool is_array) const { u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; if (GetType() == ImageType::Cube) { - slices *= 6; + if (is_array) { + slices = last_array + 1; + ASSERT(slices % 6 == 0); + } else { + slices = 6; + } } if (pow2pad) { slices = std::bit_ceil(slices); @@ -282,6 +287,11 @@ struct Image { bool IsTiled() const { return GetTilingMode() != TilingMode::Display_Linear; } + + bool IsPartialCubemap() const { + const auto viewed_slice = last_array - base_array + 1; + return GetType() == ImageType::Cube && viewed_slice < 6; + } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4530f690..ac168f18 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -581,15 +581,23 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); + ASSERT_MSG(device_addr == image.info.guest_address, + "Texel buffer aliases image subresources {:x} : {:x}", device_addr, + image.info.guest_address); boost::container::small_vector copies; u32 offset = buffer.Offset(image.cpu_addr); const u32 num_layers = image.info.resources.layers; + u32 total_size = 0; for (u32 m = 0; m < image.info.resources.levels; m++) { const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + offset += mip_ofs * num_layers; + if (offset + (mip_size * num_layers) > buffer.SizeBytes()) { + break; + } copies.push_back({ .bufferOffset = offset, .bufferRowLength = static_cast(mip_pitch), @@ -603,11 +611,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, .imageOffset = {0, 0, 0}, .imageExtent = {width, height, depth}, }); - offset += mip_ofs * num_layers; + total_size += mip_size * num_layers; } if (!copies.empty()) { scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, copies); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 8432d214..f5d10d48 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/assert.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -55,4 +56,13 @@ vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags support void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); +static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { + if (fmt == vk::Format::eR32Sfloat) { + return vk::Format::eD32Sfloat; + } else if (fmt == vk::Format::eR16Unorm) { + return vk::Format::eD16Unorm; + } + UNREACHABLE(); +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d019ff03..d7954bf7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -202,7 +202,8 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}, + cmdbuf); const std::array pre_barrier{ vk::ImageMemoryBarrier{ @@ -228,7 +229,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. cmdbuf.blitImage( - image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, + image.image, image.last_state.layout, frame->image, vk::ImageLayout::eTransferDstOptimal, MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height), vk::Filter::eLinear); @@ -269,6 +270,9 @@ void RendererVulkan::Present(Frame* frame) { auto& scheduler = present_scheduler; const auto cmdbuf = scheduler.CommandBuffer(); + + ImGui::Core::Render(cmdbuf, frame); + { auto* profiler_ctx = instance.GetProfilerContext(); TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame", @@ -326,8 +330,6 @@ void RendererVulkan::Present(Frame* frame) { }, }; - ImGui::Core::Render(cmdbuf, frame); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index d9296b50..e10b7048 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "common/alignment.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" @@ -15,7 +16,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, u64 compute_key_, const Shader::Info& info_, vk::ShaderModule module) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, compute_key{compute_key_}, + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_}, info{&info_} { const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, @@ -108,12 +109,13 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; + image_infos.clear(); + for (const auto& desc : info->buffers) { bool is_storage = true; if (desc.is_gds_buffer) { @@ -213,35 +215,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, }); } - for (const auto& image_desc : info->images) { - const auto tsharp = image_desc.GetSharp(*info); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *info, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)"); - } - } for (const auto& sampler : info->samplers) { const auto ssharp = sampler.GetSharp(*info); if (ssharp.force_degamma) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8a6213a2..f1bc7285 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -3,9 +3,8 @@ #pragma once -#include -#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -18,27 +17,17 @@ class Instance; class Scheduler; class DescriptorHeap; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, vk::PipelineCache pipeline_cache, - u64 compute_key, const Shader::Info& info, vk::ShaderModule module); + ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache, u64 compute_key, const Shader::Info& info, + vk::ShaderModule module); ~ComputePipeline(); - [[nodiscard]] vk::Pipeline Handle() const noexcept { - return *pipeline; - } - bool BindResources(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; u64 compute_key; const Shader::Info* info; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dc311a7c..5fa995b4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -21,7 +21,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul vk::PipelineCache pipeline_cache, std::span infos, std::span modules) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, key{key_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); @@ -41,8 +41,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); - boost::container::static_vector bindings; - boost::container::static_vector attributes; + boost::container::static_vector vertex_bindings; + boost::container::static_vector vertex_attributes; const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; for (const auto& input : vs_info->vs_inputs) { if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || @@ -52,13 +52,13 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const auto buffer = vs_info->ReadUd(input.sgpr_base, input.dword_offset); - attributes.push_back({ + vertex_attributes.push_back({ .location = input.binding, .binding = input.binding, .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); - bindings.push_back({ + vertex_bindings.push_back({ .binding = input.binding, .stride = buffer.GetStride(), .inputRate = input.instance_step_rate == Shader::Info::VsInput::None @@ -68,10 +68,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = static_cast(bindings.size()), - .pVertexBindingDescriptions = bindings.data(), - .vertexAttributeDescriptionCount = static_cast(attributes.size()), - .pVertexAttributeDescriptions = attributes.data(), + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), }; if (key.prim_type == Liverpool::PrimitiveType::RectList && !IsEmbeddedVs()) { @@ -291,8 +291,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul GraphicsPipeline::~GraphicsPipeline() = default; void GraphicsPipeline::BuildDescSetLayout() { - u32 binding{}; boost::container::small_vector bindings; + u32 binding{}; + for (const auto* stage : stages) { if (!stage) { continue; @@ -352,12 +353,13 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, // Bind resource buffers and textures. boost::container::static_vector buffer_views; boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; boost::container::small_vector set_writes; boost::container::small_vector buffer_barriers; Shader::PushData push_data{}; u32 binding{}; + image_infos.clear(); + for (const auto* stage : stages) { if (!stage) { continue; @@ -444,44 +446,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, }); } - boost::container::static_vector tsharps; - for (const auto& image_desc : stage->images) { - const auto tsharp = image_desc.GetSharp(*stage); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - tsharps.emplace_back(tsharp); - VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; - VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; - const auto& image_view = texture_cache.FindTexture(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout); - } else if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = image_desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); + BindTextures(texture_cache, *stage, binding, set_writes); - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); - } - } for (const auto& sampler : stage->samplers) { auto ssharp = sampler.GetSharp(*stage); if (ssharp.force_degamma) { LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); } if (sampler.disable_aniso) { - const auto& tsharp = tsharps[sampler.associated_image]; + const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage); if (tsharp.base_level == 0 && tsharp.last_level == 0) { ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index c8a08b4f..74817656 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,7 +5,7 @@ #include "common/types.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" namespace VideoCore { class BufferCache; @@ -33,6 +33,7 @@ struct GraphicsPipelineKey { Liverpool::DepthControl depth_stencil; u32 depth_bias_enable; u32 num_samples; + u32 mrt_mask; Liverpool::StencilControl stencil; Liverpool::PrimitiveType prim_type; u32 enable_primitive_restart; @@ -50,26 +51,17 @@ struct GraphicsPipelineKey { } }; -class GraphicsPipeline { +class GraphicsPipeline : public Pipeline { public: - explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, - DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, - vk::PipelineCache pipeline_cache, - std::span stages, - std::span modules); + GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, + std::span stages, + std::span modules); ~GraphicsPipeline(); void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache) const; - vk::Pipeline Handle() const noexcept { - return *pipeline; - } - - vk::PipelineLayout GetLayout() const { - return *pipeline_layout; - } - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { return *stages[u32(stage)]; } @@ -83,6 +75,10 @@ public: return key.write_masks; } + auto GetMrtMask() const { + return key.mrt_mask; + } + bool IsDepthEnabled() const { return key.depth_stencil.depth_enable.Value(); } @@ -91,12 +87,6 @@ private: void BuildDescSetLayout(); private: - const Instance& instance; - Scheduler& scheduler; - DescriptorHeap& desc_heap; - vk::UniquePipeline pipeline; - vk::UniquePipelineLayout pipeline_layout; - vk::UniqueDescriptorSetLayout desc_layout; std::array stages{}; GraphicsPipelineKey key; bool uses_push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0bc73e14..8bec96cf 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -282,6 +282,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFeatures2{ .features{ .robustBufferAccess = features.robustBufferAccess, + .imageCubeArray = features.imageCubeArray, .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, .logicOp = features.logicOp, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7f6079a5..4fc1f46e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -234,18 +234,20 @@ bool PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; key.num_samples = regs.aa_config.NumSamples(); - const auto skip_cb_binding = + const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. - // We need to do some arrays compaction at this stage + // attachments. This might be not a case as HW color buffers can be bound in an arbitrary + // order. We need to do some arrays compaction at this stage key.color_formats.fill(vk::Format::eUndefined); key.blend_controls.fill({}); key.write_masks.fill({}); key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); - int remapped_cb{}; - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + + // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader + // recompiler. + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { continue; @@ -258,11 +260,6 @@ bool PipelineCache::RefreshGraphicsKey() { if (base_format == key.color_formats[remapped_cb]) { key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); } - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); ++remapped_cb; } @@ -309,6 +306,28 @@ bool PipelineCache::RefreshGraphicsKey() { std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } + + const auto* fs_info = infos[u32(Shader::Stage::Fragment)]; + key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; + + // Second pass to fill remain CB pipeline key data + for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { + auto const& col_buf = regs.color_buffers[cb]; + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb) || + (key.mrt_mask & (1u << cb)) == 0) { + key.color_formats[cb] = vk::Format::eUndefined; + key.mrt_swizzles[cb] = Liverpool::ColorBuffer::SwapMode::Standard; + continue; + } + + key.blend_controls[remapped_cb] = regs.blend_control[cb]; + key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && + !col_buf.info.blend_bypass); + key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; + key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + + ++remapped_cb; + } return true; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp new file mode 100644 index 00000000..77029602 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_common.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace Vulkan { + +boost::container::static_vector Pipeline::image_infos; + +Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, + vk::PipelineCache pipeline_cache) + : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} + +Pipeline::~Pipeline() = default; + +void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + u32& binding, DescriptorWrites& set_writes) const { + + using ImageBindingInfo = std::tuple; + boost::container::static_vector image_bindings; + + for (const auto& image_desc : stage.images) { + const auto tsharp = image_desc.GetSharp(stage); + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + VideoCore::ImageInfo image_info{tsharp, image_desc}; + const auto image_id = texture_cache.FindImage(image_info); + auto& image = texture_cache.GetImage(image_id); + image.flags |= VideoCore::ImageFlagBits::Bound; + image_bindings.emplace_back(image_id, tsharp, image_desc); + } else { + image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc); + } + + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); + } + } + + // Second pass to re-bind images that were updated after binding + for (auto [image_id, tsharp, desc] : image_bindings) { + if (!image_id) { + if (instance.IsNullDescriptorSupported()) { + image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + } else { + auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + vk::ImageLayout::eGeneral); + } + } else { + auto& image = texture_cache.GetImage(image_id); + if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) { + image_id = texture_cache.FindImage(image.info); + } + VideoCore::ImageViewInfo view_info{tsharp, desc}; + auto& image_view = texture_cache.FindTexture(image_id, view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + texture_cache.GetImage(image_id).last_state.layout); + image.flags &= + ~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_infos.back(), + }); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h new file mode 100644 index 00000000..627ce389 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/info.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +class BufferCache; +class TextureCache; +} // namespace VideoCore + +namespace Vulkan { + +class Instance; +class Scheduler; +class DescriptorHeap; + +class Pipeline { +public: + Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + vk::PipelineCache pipeline_cache); + virtual ~Pipeline(); + + vk::Pipeline Handle() const noexcept { + return *pipeline; + } + + vk::PipelineLayout GetLayout() const noexcept { + return *pipeline_layout; + } + + using DescriptorWrites = boost::container::small_vector; + void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, + u32& binding, DescriptorWrites& set_writes) const; + +protected: + const Instance& instance; + Scheduler& scheduler; + DescriptorHeap& desc_heap; + vk::UniquePipeline pipeline; + vk::UniquePipelineLayout pipeline_layout; + vk::UniqueDescriptorSetLayout desc_layout; + static boost::container::static_vector image_infos; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index feadda96..6abd00aa 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -44,7 +44,6 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback( case 0xc81ad50e: case 0xb7c39078: case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE - case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error) return VK_FALSE; default: break; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23f60da1..eac27272 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); @@ -102,7 +102,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0); - BeginRendering(); + BeginRendering(*pipeline); UpdateDynamicState(*pipeline); const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); @@ -179,7 +179,7 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::BeginRendering() { +void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; RenderState state; @@ -199,6 +199,13 @@ void Rasterizer::BeginRendering() { continue; } + // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform + // an unnecessary transition and may result in state conflict if the resource is already + // bound for reading. + if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) { + continue; + } + const auto& hint = liverpool->last_cb_extent[col_buf_id]; VideoCore::ImageInfo image_info{col_buf, hint}; VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; @@ -240,7 +247,7 @@ void Rasterizer::BeginRendering() { state.depth_image = image.image; state.depth_attachment = { .imageView = *image_view.image_view, - .imageLayout = image.layout, + .imageLayout = image.last_state.layout, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 5aa90c5c..bd05c8fa 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -52,7 +52,7 @@ public: void Finish(); private: - void BeginRendering(); + void BeginRendering(const GraphicsPipeline& pipeline); void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateViewportScissorState(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b99dfdbb..08b5014e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -59,58 +59,6 @@ void Scheduler::EndRendering() { } is_rendering = false; current_cmdbuf.endRendering(); - - boost::container::static_vector barriers; - for (size_t i = 0; i < render_state.num_color_attachments; ++i) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, - .newLayout = vk::ImageLayout::eColorAttachmentOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.color_images[i], - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - if (render_state.has_depth || render_state.has_stencil) { - barriers.push_back(vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = render_state.depth_attachment.imageLayout, - .newLayout = render_state.depth_attachment.imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = render_state.depth_image, - .subresourceRange = - { - .aspectMask = vk::ImageAspectFlagBits::eDepth | - (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil - : vk::ImageAspectFlagBits::eNone), - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - } - - if (!barriers.empty()) { - const auto src_stages = - vk::PipelineStageFlagBits::eColorAttachmentOutput | - (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | - vk::PipelineStageFlagBits::eEarlyFragmentTests - : vk::PipelineStageFlagBits::eNone); - current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, - vk::DependencyFlagBits::eByRegion, {}, {}, barriers); - } } void Scheduler::Flush(SubmitInfo& info) { diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 9e8c38f0..4ce6e1ee 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #define VULKAN_HPP_NO_EXCEPTIONS +#include #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -124,7 +125,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.props.is_cube) { + if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; @@ -179,52 +180,132 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, info.guest_size_bytes); } -void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf) { - if (dst_layout == layout && dst_mask == access_mask) { - return; +boost::container::small_vector Image::GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range) { + const bool needs_partial_transition = + subres_range && + (subres_range->base != SubresourceBase{} || subres_range->extent != info.resources); + const bool partially_transited = !subresource_states.empty(); + + boost::container::small_vector barriers{}; + if (needs_partial_transition || partially_transited) { + if (!partially_transited) { + subresource_states.resize(info.resources.levels * info.resources.layers); + std::fill(subresource_states.begin(), subresource_states.end(), last_state); + } + + // In case of partial transition, we need to change the specified subresources only. + // Otherwise all subresources need to be set to the same state so we can use a full + // resource transition for the next time. + const auto mips = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.level, + subres_range->base.level + subres_range->extent.levels) + : std::views::iota(0u, info.resources.levels); + const auto layers = + needs_partial_transition + ? std::ranges::views::iota(subres_range->base.layer, + subres_range->base.layer + subres_range->extent.layers) + : std::views::iota(0u, info.resources.layers); + + for (u32 mip : mips) { + for (u32 layer : layers) { + // NOTE: these loops may produce a lot of small barriers. + // If this becomes a problem, we can optimize it by merging adjacent barriers. + const auto subres_idx = mip * info.resources.layers + layer; + ASSERT(subres_idx < subresource_states.size()); + auto& state = subresource_states[subres_idx]; + + if (state.layout != dst_layout || state.access_mask != dst_mask) { + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = state.pl_stage, + .srcAccessMask = state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = mip, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }, + }); + state.layout = dst_layout; + state.access_mask = dst_mask; + state.pl_stage = dst_stage; + } + } + } + + if (!needs_partial_transition) { + subresource_states.clear(); + } + } else { // Full resource transition + if (last_state.layout == dst_layout && last_state.access_mask == dst_mask) { + return {}; + } + + barriers.emplace_back(vk::ImageMemoryBarrier2{ + .srcStageMask = last_state.pl_stage, + .srcAccessMask = last_state.access_mask, + .dstStageMask = dst_stage, + .dstAccessMask = dst_mask, + .oldLayout = last_state.layout, + .newLayout = dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); } - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = access_mask, - .dstAccessMask = dst_mask, - .oldLayout = layout, - .newLayout = dst_layout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + last_state.layout = dst_layout; + last_state.access_mask = dst_mask; + last_state.pl_stage = dst_stage; + return barriers; +} + +void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf /*= {}*/) { // Adjust pipieline stage - const vk::PipelineStageFlags dst_pl_stage = - (dst_mask == vk::AccessFlagBits::eTransferRead || - dst_mask == vk::AccessFlagBits::eTransferWrite) - ? vk::PipelineStageFlagBits::eTransfer - : vk::PipelineStageFlagBits::eAllGraphics | vk::PipelineStageFlagBits::eComputeShader; + const vk::PipelineStageFlags2 dst_pl_stage = + (dst_mask == vk::AccessFlagBits2::eTransferRead || + dst_mask == vk::AccessFlagBits2::eTransferWrite) + ? vk::PipelineStageFlagBits2::eTransfer + : vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader; + + const auto barriers = GetBarriers(dst_layout, dst_mask, dst_pl_stage, range); + if (barriers.empty()) { + return; + } if (!cmdbuf) { // When using external cmdbuf you are responsible for ending rp. scheduler->EndRendering(); cmdbuf = scheduler->CommandBuffer(); } - cmdbuf.pipelineBarrier(pl_stage, dst_pl_stage, vk::DependencyFlagBits::eByRegion, {}, {}, - barrier); - - layout = dst_layout; - access_mask = dst_mask; - pl_stage = dst_pl_stage; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); } void Image::Upload(vk::Buffer buffer, u64 offset) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); // Copy to the image. const auto aspect = aspect_mask & vk::ImageAspectFlagBits::eStencil @@ -248,12 +329,12 @@ void Image::Upload(vk::Buffer buffer, u64 offset) { cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyImage(const Image& image) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -279,15 +360,16 @@ void Image::CopyImage(const Image& image) { .extent = {mip_w, mip_h, mip_d}, }); } - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } void Image::CopyMip(const Image& image, u32 mip) { scheduler->EndRendering(); - Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); @@ -313,10 +395,11 @@ void Image::CopyMip(const Image& image, u32 mip) { }, .extent = {mip_w, mip_h, mip_d}, }; - cmdbuf.copyImage(image.image, image.layout, this->image, this->layout, image_copy); + cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, + image_copy); Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); } Image::~Image() = default; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 1bbb975b..02a48b6a 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,6 +32,8 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered + Bound = 1 << 9, ///< True when the image is bound to a descriptor set + NeedsRebind = 1 << 10, ///< True when the image needs to be rebound }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -91,8 +93,11 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } - void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, - vk::CommandBuffer cmdbuf = {}); + boost::container::small_vector GetBarriers( + vk::ImageLayout dst_layout, vk::Flags dst_mask, + vk::PipelineStageFlags2 dst_stage, std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, + std::optional range, vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); void CopyImage(const Image& image); @@ -111,10 +116,14 @@ struct Image { // Resource state tracking vk::ImageUsageFlags usage; - vk::Flags pl_stage = vk::PipelineStageFlagBits::eAllCommands; - vk::Flags access_mask = vk::AccessFlagBits::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; - boost::container::small_vector mip_hashes; + struct State { + vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; + vk::Flags access_mask = vk::AccessFlagBits2::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + }; + State last_state{}; + std::vector subresource_states{}; + boost::container::small_vector mip_hashes{}; u64 tick_accessed_last{0}; }; diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 011e19db..521e4118 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -200,18 +200,12 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice mips_layout.emplace_back(depth_slice_sz, pitch, 0); } -ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept { tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target - if (force_depth) { - if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { - pixel_format = vk::Format::eD32SfloatS8Uint; - } else if (pixel_format == vk::Format::eR16Unorm) { - pixel_format = vk::Format::eD16UnormS8Uint; - } else { - UNREACHABLE(); - } + if (desc.is_depth) { + pixel_format = LiverpoolToVK::PromoteFormatToDepth(pixel_format); } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); @@ -224,7 +218,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(); + resources.layers = image.NumLayers(desc.is_array); num_bits = NumBits(image.GetDataFmt()); usage.texture = true; diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index ba8985b8..2ae2547f 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -5,6 +5,7 @@ #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/texture_cache/types.h" @@ -19,7 +20,7 @@ struct ImageInfo { const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; - ImageInfo(const AmdGpu::Image& image, bool force_depth = false) noexcept; + ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; bool IsTiled() const { return tiling_mode != AmdGpu::TilingMode::Display_Linear; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index f94c1a37..2aad1afb 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -66,19 +67,40 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) { return format; } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept - : is_storage{is_storage_} { - type = ConvertImageViewType(image.GetType()); +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept + : is_storage{desc.is_storage} { const auto dfmt = image.GetDataFmt(); auto nfmt = image.GetNumberFmt(); if (is_storage && nfmt == AmdGpu::NumberFormat::Srgb) { nfmt = AmdGpu::NumberFormat::Unorm; } format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt); + if (desc.is_depth) { + format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format); + } range.base.level = image.base_level; range.base.layer = image.base_array; - range.extent.levels = image.last_level + 1; - range.extent.layers = image.last_array + 1; + range.extent.levels = image.last_level - image.base_level + 1; + range.extent.layers = image.last_array - image.base_array + 1; + type = ConvertImageViewType(image.GetType()); + + // Adjust view type for partial cubemaps and arrays + if (image.IsPartialCubemap()) { + type = vk::ImageViewType::e2DArray; + } + if (type == vk::ImageViewType::eCube) { + if (desc.is_array) { + type = vk::ImageViewType::eCubeArray; + } else { + // Some games try to bind an array of cubemaps while shader reads only single one. + range.extent.layers = std::min(range.extent.layers, 6u); + } + } + if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { + // Some games pass incorrect layer count for 3D textures so we need to fixup it. + range.extent.layers = 1; + } + if (!is_storage) { mapping.r = ConvertComponentSwizzle(image.dst_sel_x); mapping.g = ConvertComponentSwizzle(image.dst_sel_y); @@ -103,7 +125,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; - range.extent.layers = col_buffer.NumSlices(); + range.extent.layers = col_buffer.NumSlices() - range.base.layer; format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } @@ -115,7 +137,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, depth_buffer.stencil_info.format); is_storage = ctl.depth_write_enable; range.base.layer = view.slice_start; - range.extent.layers = view.NumSlices(); + range.extent.layers = view.NumSlices() - range.base.layer; } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, @@ -147,9 +169,9 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .subresourceRange{ .aspectMask = aspect, .baseMipLevel = info.range.base.level, - .levelCount = info.range.extent.levels - info.range.base.level, + .levelCount = info.range.extent.levels, .baseArrayLayer = info.range.base.layer, - .layerCount = info.range.extent.layers - info.range.base.layer, + .layerCount = info.range.extent.layers, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 7d53590d..ba8d2c72 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -3,6 +3,7 @@ #pragma once +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -17,7 +18,7 @@ namespace VideoCore { struct ImageViewInfo { ImageViewInfo() = default; - ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; + ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8621e95f..dfa1bab6 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -87,8 +87,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image auto new_image_id = slot_images.insert(instance, scheduler, requested_info); RegisterImage(new_image_id); - // auto& new_image = slot_images[new_image_id]; - // TODO: need to run a helper for depth copy here + // TODO: perform a depth copy here FreeImage(cache_image_id); return new_image_id; @@ -98,7 +97,11 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Image !requested_info.usage.depth_target && (requested_info.usage.texture || requested_info.usage.storage); if (cache_info.usage.depth_target && should_bind_as_texture) { - return cache_image_id; + if (cache_info.resources == requested_info.resources) { + return cache_image_id; + } else { + UNREACHABLE(); + } } return {}; @@ -154,7 +157,7 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ if (tex_cache_image.info.IsMipOf(image_info)) { tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, - vk::AccessFlagBits::eTransferRead); + vk::AccessFlagBits2::eTransferRead, {}); const auto num_mips_to_copy = tex_cache_image.info.resources.levels; ASSERT(num_mips_to_copy == 1); @@ -176,9 +179,13 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { auto& src_image = slot_images[image_id]; auto& new_image = slot_images[new_image_id]; - src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); + if (True(src_image.flags & ImageFlagBits::Bound)) { + src_image.flags |= ImageFlagBits::NeedsRebind; + } + FreeImage(image_id); TrackImage(new_image_id); @@ -255,21 +262,21 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(info); +ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { Image& image = slot_images[image_id]; UpdateImage(image_id); auto& usage = image.info.usage; if (view_info.is_storage) { image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, + view_info.range); usage.storage = true; } else { const auto new_layout = image.info.IsDepthStencil() ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eShaderReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eShaderRead); + image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range); usage.texture = true; } @@ -284,8 +291,9 @@ ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, UpdateImage(image_id); image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits::eColorAttachmentWrite | - vk::AccessFlagBits::eColorAttachmentRead); + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + view_info.range); // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -330,8 +338,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, : vk::ImageLayout::eDepthAttachmentOptimal : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal : vk::ImageLayout::eDepthReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead); + image.Transit(new_layout, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eDepthStencilAttachmentRead, + view_info.range); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { @@ -404,7 +414,8 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule sched_ptr->EndRendering(); const auto cmdbuf = sched_ptr->CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); + image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}, + cmdbuf); const VAddr image_addr = image.info.guest_address; const size_t image_size = image.info.guest_size_bytes; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 329128a3..b2a8f13f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -59,9 +59,8 @@ public: /// Retrieves the image handle of the image with the provided attributes. [[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); - /// Retrieves an image view with the properties of the specified image descriptor. - [[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info, - const ImageViewInfo& view_info); + /// Retrieves an image view with the properties of the specified image id. + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info,