video_core: Small fixes regarding GDS (#3942)

* shader_recompiler: Add missing descriptor type for GDS buffer

* liverpool: Implement gds to memory store

* macOS fix?

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
TheTurtle
2026-01-22 17:05:16 +02:00
committed by GitHub
parent 508bad87d5
commit fecfbb6b4a
3 changed files with 33 additions and 14 deletions

View File

@@ -660,6 +660,7 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
inst.SetArg(1, ir.Imm32(binding));
} else {
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
auto& buffer = info.buffers[binding];
const IR::U32 offset = IR::U32{inst.Arg(0)};
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
@@ -705,27 +706,35 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
case IR::Opcode::SharedAtomicXor32:
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
break;
case IR::Opcode::LoadSharedU16:
case IR::Opcode::LoadSharedU16: {
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
buffer.used_types |= IR::Type::U16;
break;
}
case IR::Opcode::LoadSharedU32:
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
break;
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU64: {
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
buffer.used_types |= IR::Type::U64;
break;
case IR::Opcode::WriteSharedU16:
}
case IR::Opcode::WriteSharedU16: {
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
inst.Invalidate();
buffer.used_types |= IR::Type::U16;
break;
}
case IR::Opcode::WriteSharedU32:
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
inst.Invalidate();
break;
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU64: {
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
inst.Invalidate();
buffer.used_types |= IR::Type::U64;
break;
}
default:
UNREACHABLE();
}

View File

@@ -1057,8 +1057,12 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
}
case PM4ItOpcode::ReleaseMem: {
const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
release_mem->SignalFence([pipe_id = queue.pipe_id] {
release_mem->SignalFence(
[pipe_id = queue.pipe_id] {
Platform::IrqC::Instance()->Signal(static_cast<Platform::InterruptId>(pipe_id));
},
[this](VAddr dst, u16 gds_index, u16 num_dwords) {
rasterizer->CopyBuffer(dst, gds_index, num_dwords * sizeof(u32), false, true);
});
break;
}

View File

@@ -327,6 +327,7 @@ enum class DataSelect : u32 {
Data64 = 2,
GpuClock64 = 3,
PerfCounter = 4,
GdsMemStore = 5,
};
enum class InterruptSelect : u32 {
@@ -920,8 +921,9 @@ struct PM4CmdReleaseMem {
u32 data_hi;
template <typename T>
T* Address() const {
return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
T Address() const {
u64 full_address = address_lo | (u64(address_hi) << 32);
return std::bit_cast<T>(full_address);
}
u32 DataDWord() const {
@@ -932,22 +934,26 @@ struct PM4CmdReleaseMem {
return data_lo | u64(data_hi) << 32;
}
void SignalFence(auto&& signal_irq) const {
void SignalFence(auto&& signal_irq, auto&& gds_to_mem) const {
switch (data_sel.Value()) {
case DataSelect::Data32Low: {
*Address<u32>() = DataDWord();
*Address<u32*>() = DataDWord();
break;
}
case DataSelect::Data64: {
*Address<u64>() = DataQWord();
*Address<u64*>() = DataQWord();
break;
}
case DataSelect::GpuClock64: {
*Address<u64>() = GetGpuClock64();
*Address<u64*>() = GetGpuClock64();
break;
}
case DataSelect::PerfCounter: {
*Address<u64>() = GetGpuPerfCounter();
*Address<u64*>() = GetGpuPerfCounter();
break;
}
case DataSelect::GdsMemStore: {
gds_to_mem(Address<VAddr>(), gds_index, num_dw);
break;
}
default: {