diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 53b161149..93129ac0e 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -660,6 +660,7 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info, inst.SetArg(1, ir.Imm32(binding)); } else { // Convert shared memory opcode to storage buffer atomic to GDS buffer. + auto& buffer = info.buffers[binding]; const IR::U32 offset = IR::U32{inst.Arg(0)}; const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1)); const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2)); @@ -705,27 +706,35 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info, case IR::Opcode::SharedAtomicXor32: inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {})); break; - case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedU16: { inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {})); + buffer.used_types |= IR::Type::U16; break; + } case IR::Opcode::LoadSharedU32: inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {})); break; - case IR::Opcode::LoadSharedU64: + case IR::Opcode::LoadSharedU64: { inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {})); + buffer.used_types |= IR::Type::U64; break; - case IR::Opcode::WriteSharedU16: + } + case IR::Opcode::WriteSharedU16: { ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {}); inst.Invalidate(); + buffer.used_types |= IR::Type::U16; break; + } case IR::Opcode::WriteSharedU32: ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {}); inst.Invalidate(); break; - case IR::Opcode::WriteSharedU64: + case IR::Opcode::WriteSharedU64: { ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {}); inst.Invalidate(); + buffer.used_types |= IR::Type::U64; break; + } default: UNREACHABLE(); } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 32ea1e8ed..b2a4d7a61 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -1057,9 +1057,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { } case PM4ItOpcode::ReleaseMem: { const auto* release_mem = reinterpret_cast(header); - release_mem->SignalFence([pipe_id = queue.pipe_id] { - Platform::IrqC::Instance()->Signal(static_cast(pipe_id)); - }); + release_mem->SignalFence( + [pipe_id = queue.pipe_id] { + Platform::IrqC::Instance()->Signal(static_cast(pipe_id)); + }, + [this](VAddr dst, u16 gds_index, u16 num_dwords) { + rasterizer->CopyBuffer(dst, gds_index, num_dwords * sizeof(u32), false, true); + }); break; } case PM4ItOpcode::EventWrite: { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 46ecb09d6..17511d0a2 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -327,6 +327,7 @@ enum class DataSelect : u32 { Data64 = 2, GpuClock64 = 3, PerfCounter = 4, + GdsMemStore = 5, }; enum class InterruptSelect : u32 { @@ -920,8 +921,9 @@ struct PM4CmdReleaseMem { u32 data_hi; template - T* Address() const { - return reinterpret_cast(address_lo | u64(address_hi) << 32); + T Address() const { + u64 full_address = address_lo | (u64(address_hi) << 32); + return std::bit_cast(full_address); } u32 DataDWord() const { @@ -932,22 +934,26 @@ struct PM4CmdReleaseMem { return data_lo | u64(data_hi) << 32; } - void SignalFence(auto&& signal_irq) const { + void SignalFence(auto&& signal_irq, auto&& gds_to_mem) const { switch (data_sel.Value()) { case DataSelect::Data32Low: { - *Address() = DataDWord(); + *Address() = DataDWord(); break; } case DataSelect::Data64: { - *Address() = DataQWord(); + *Address() = DataQWord(); break; } case DataSelect::GpuClock64: { - *Address() = GetGpuClock64(); + *Address() = GetGpuClock64(); break; } case DataSelect::PerfCounter: { - *Address() = GetGpuPerfCounter(); + *Address() = GetGpuPerfCounter(); + break; + } + case DataSelect::GdsMemStore: { + gds_to_mem(Address(), gds_index, num_dw); break; } default: {