mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-11-23 03:09:55 +00:00
renderer_vulkan: Commize and adjust buffer bindings (#1412)
* shader_recompiler: Implement finite cmp class * shader_recompiler: Implement more opcodes * renderer_vulkan: Commonize buffer binding * liverpool: More dma data impl * fix * copy_shader: Handle additional instructions from Knack * translator: Add V_CMPX_GE_I32
This commit is contained in:
parent
47ba6c6344
commit
87f8fea4de
@ -45,6 +45,10 @@ public:
|
||||
explicit AddressSpace();
|
||||
~AddressSpace();
|
||||
|
||||
[[nodiscard]] u8* BackingBase() const noexcept {
|
||||
return backing_base;
|
||||
}
|
||||
|
||||
[[nodiscard]] VAddr SystemManagedVirtualBase() noexcept {
|
||||
return reinterpret_cast<VAddr>(system_managed_base);
|
||||
}
|
||||
|
@ -54,6 +54,17 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size) {
|
||||
total_flexible_size, total_direct_size);
|
||||
}
|
||||
|
||||
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
|
||||
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
|
||||
const auto& vma = FindVMA(virtual_addr)->second;
|
||||
if (vma.type != VMAType::Direct) {
|
||||
return false;
|
||||
}
|
||||
u8* backing = impl.BackingBase() + vma.phys_base + (virtual_addr - vma.base);
|
||||
memcpy(backing, data, num_bytes);
|
||||
return true;
|
||||
}
|
||||
|
||||
PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment) {
|
||||
std::scoped_lock lk{mutex};
|
||||
|
||||
|
@ -149,6 +149,8 @@ public:
|
||||
return impl.SystemReservedVirtualBase();
|
||||
}
|
||||
|
||||
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
|
||||
|
||||
void SetupMemoryRegions(u64 flexible_size);
|
||||
|
||||
PAddr PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment);
|
||||
|
@ -29,6 +29,14 @@ CopyShaderData ParseCopyShader(std::span<const u32> code) {
|
||||
sources[inst.dst[0].code] = inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::S_MOV_B32: {
|
||||
sources[inst.dst[0].code] = inst.src[0].code;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::S_ADDK_I32: {
|
||||
sources[inst.dst[0].code] += inst.control.sopk.simm;
|
||||
break;
|
||||
}
|
||||
case Gcn::Opcode::EXP: {
|
||||
const auto& exp = inst.control.exp;
|
||||
const IR::Attribute semantic = static_cast<IR::Attribute>(exp.target);
|
||||
|
@ -92,8 +92,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
break;
|
||||
case Opcode::S_BREV_B32:
|
||||
return S_BREV_B32(inst);
|
||||
case Opcode::S_BCNT1_I32_B64:
|
||||
return S_BCNT1_I32_B64(inst);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_AND_SAVEEXEC_B64(inst);
|
||||
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_ORN2_SAVEEXEC_B64:
|
||||
return S_SAVEEXEC_B64(NegateMode::Src1, true, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
@ -540,11 +544,17 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
|
||||
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
IR::U1 exec{ir.GetExec()};
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
@ -568,7 +578,13 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
}
|
||||
|
||||
// Update EXEC.
|
||||
const IR::U1 result = ir.LogicalAnd(exec, src);
|
||||
if (negate == NegateMode::Src1) {
|
||||
exec = ir.LogicalNot(exec);
|
||||
}
|
||||
IR::U1 result = is_or ? ir.LogicalOr(exec, src) : ir.LogicalAnd(exec, src);
|
||||
if (negate == NegateMode::Result) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ir.SetExec(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
@ -108,8 +108,9 @@ public:
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_NOT_B64(const GcnInst& inst);
|
||||
void S_BREV_B32(const GcnInst& inst);
|
||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
|
||||
// SOPC
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
@ -225,6 +226,7 @@ public:
|
||||
void V_MED3_I32(const GcnInst& inst);
|
||||
void V_SAD(const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_U16_U32(const GcnInst& inst);
|
||||
void V_CVT_PK_U8_F32(const GcnInst& inst);
|
||||
void V_LSHL_B64(const GcnInst& inst);
|
||||
void V_MUL_F64(const GcnInst& inst);
|
||||
|
@ -157,6 +157,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
return V_RCP_F64(inst);
|
||||
case Opcode::V_RCP_IFLAG_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_RCP_CLAMP_F32:
|
||||
return V_RCP_F32(inst);
|
||||
case Opcode::V_RSQ_CLAMP_F32:
|
||||
return V_RSQ_F32(inst);
|
||||
case Opcode::V_RSQ_LEGACY_F32:
|
||||
@ -268,6 +270,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
return V_CMP_U32(ConditionOp::GT, true, true, inst);
|
||||
case Opcode::V_CMPX_LG_I32:
|
||||
return V_CMP_U32(ConditionOp::LG, true, true, inst);
|
||||
case Opcode::V_CMPX_GE_I32:
|
||||
return V_CMP_U32(ConditionOp::GE, true, true, inst);
|
||||
|
||||
// V_CMP_{OP8}_U32
|
||||
case Opcode::V_CMP_F_U32:
|
||||
@ -355,6 +359,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
||||
return V_MED3_I32(inst);
|
||||
case Opcode::V_SAD_U32:
|
||||
return V_SAD_U32(inst);
|
||||
case Opcode::V_CVT_PK_U16_U32:
|
||||
return V_CVT_PK_U16_U32(inst);
|
||||
case Opcode::V_CVT_PK_U8_F32:
|
||||
return V_CVT_PK_U8_F32(inst);
|
||||
case Opcode::V_LSHL_B64:
|
||||
@ -1108,6 +1114,14 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.IAdd(result, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false);
|
||||
const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false);
|
||||
SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <type_traits>
|
||||
#include "common/func_traits.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
@ -215,36 +216,17 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
template <u32 idx>
|
||||
bool IsArgImm(const IR::Inst& inst, u32 imm) {
|
||||
const IR::Value& arg = inst.Arg(idx);
|
||||
return arg.IsImmediate() && arg.U32() == imm;
|
||||
};
|
||||
|
||||
void FoldBooleanConvert(IR::Inst& inst) {
|
||||
// Eliminate pattern
|
||||
// %4 = <some bool>
|
||||
// %5 = SelectU32 %4, #1, #0 (uses: 2)
|
||||
// %8 = INotEqual %5, #0 (uses: 1)
|
||||
if (!IsArgImm<1>(inst, 0)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* prod = inst.Arg(0).TryInstRecursive();
|
||||
if (!prod || prod->GetOpcode() != IR::Opcode::SelectU32) {
|
||||
return;
|
||||
}
|
||||
if (IsArgImm<1>(*prod, 1) && IsArgImm<2>(*prod, 0)) {
|
||||
inst.ReplaceUsesWith(prod->Arg(0));
|
||||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Inst& inst) {
|
||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 value = IR::F32{inst.Arg(0)};
|
||||
inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value))));
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@ -276,7 +258,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(inst);
|
||||
FoldCmpClass(block, inst);
|
||||
return;
|
||||
case IR::Opcode::ShiftLeftLogical32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return static_cast<u32>(a << b); });
|
||||
|
@ -605,7 +605,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
const auto new_inst = [&] -> IR::Value {
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
|
@ -24,6 +24,8 @@ enum class FloatClassFunc : u32 {
|
||||
|
||||
NaN = SignalingNan | QuietNan,
|
||||
Infinity = PositiveInfinity | NegativeInfinity,
|
||||
Finite = NegativeNormal | NegativeDenorm | NegativeZero | PositiveNormal | PositiveDenorm |
|
||||
PositiveZero,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "common/thread.h"
|
||||
#include "core/debug_state.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
@ -504,7 +505,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEos: {
|
||||
const auto* event_eos = reinterpret_cast<const PM4CmdEventWriteEos*>(header);
|
||||
event_eos->SignalFence();
|
||||
event_eos->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
||||
memcpy(address, &data, num_bytes);
|
||||
}
|
||||
});
|
||||
if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) {
|
||||
ASSERT(event_eos->size == 1);
|
||||
if (rasterizer) {
|
||||
@ -517,13 +523,42 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEop: {
|
||||
const auto* event_eop = reinterpret_cast<const PM4CmdEventWriteEop*>(header);
|
||||
event_eop->SignalFence();
|
||||
event_eop->SignalFence([](void* address, u64 data, u32 num_bytes) {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
if (!memory->TryWriteBacking(address, &data, num_bytes)) {
|
||||
memcpy(address, &data, num_bytes);
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
||||
if (dma_data->dst_addr_lo == 0x3022C) {
|
||||
break;
|
||||
}
|
||||
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineDataToGds(dma_data->dst_addr_lo, dma_data->data);
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32),
|
||||
true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo,
|
||||
dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data,
|
||||
sizeof(u32), false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
LOG_WARNING(Render_Vulkan, "GDS memory read");
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
||||
dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), false);
|
||||
} else {
|
||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -631,6 +666,35 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
||||
};
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DmaData: {
|
||||
const auto* dma_data = reinterpret_cast<const PM4DmaData*>(header);
|
||||
if (dma_data->dst_addr_lo == 0x3022C) {
|
||||
break;
|
||||
}
|
||||
if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Gds) {
|
||||
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
|
||||
dma_data->NumBytes(), true);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Data &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(), &dma_data->data, sizeof(u32),
|
||||
false);
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
LOG_WARNING(Render_Vulkan, "GDS memory read");
|
||||
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
|
||||
dma_data->dst_sel == DmaDataDst::Memory) {
|
||||
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
|
||||
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
|
||||
false);
|
||||
} else {
|
||||
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
|
||||
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::AcquireMem: {
|
||||
break;
|
||||
}
|
||||
|
@ -313,25 +313,26 @@ struct PM4CmdEventWriteEop {
|
||||
return data_lo | u64(data_hi) << 32;
|
||||
}
|
||||
|
||||
void SignalFence() const {
|
||||
void SignalFence(auto&& write_mem) const {
|
||||
u32* address = Address<u32>();
|
||||
switch (data_sel.Value()) {
|
||||
case DataSelect::None: {
|
||||
break;
|
||||
}
|
||||
case DataSelect::Data32Low: {
|
||||
*Address<u32>() = DataDWord();
|
||||
write_mem(address, DataDWord(), sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case DataSelect::Data64: {
|
||||
*Address<u64>() = DataQWord();
|
||||
write_mem(address, DataQWord(), sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case DataSelect::GpuClock64: {
|
||||
*Address<u64>() = GetGpuClock64();
|
||||
write_mem(address, GetGpuClock64(), sizeof(u64));
|
||||
break;
|
||||
}
|
||||
case DataSelect::PerfCounter: {
|
||||
*Address<u64>() = Common::FencedRDTSC();
|
||||
write_mem(address, Common::FencedRDTSC(), sizeof(u64));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -401,6 +402,20 @@ struct PM4DmaData {
|
||||
u32 dst_addr_lo;
|
||||
u32 dst_addr_hi;
|
||||
u32 command;
|
||||
|
||||
template <typename T>
|
||||
T SrcAddress() const {
|
||||
return std::bit_cast<T>(src_addr_lo | u64(src_addr_hi) << 32);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T DstAddress() const {
|
||||
return std::bit_cast<T>(dst_addr_lo | u64(dst_addr_hi) << 32);
|
||||
}
|
||||
|
||||
u32 NumBytes() const noexcept {
|
||||
return command & 0x1fffff;
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdWaitRegMem {
|
||||
@ -432,7 +447,7 @@ struct PM4CmdWaitRegMem {
|
||||
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
return std::bit_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
@ -534,11 +549,11 @@ struct PM4CmdEventWriteEos {
|
||||
return this->data;
|
||||
}
|
||||
|
||||
void SignalFence() const {
|
||||
void SignalFence(auto&& write_mem) const {
|
||||
const auto cmd = command.Value();
|
||||
switch (cmd) {
|
||||
case Command::SignalFence: {
|
||||
*Address() = DataDWord();
|
||||
write_mem(Address(), DataDWord(), sizeof(u32));
|
||||
break;
|
||||
}
|
||||
case Command::GdsStore: {
|
||||
|
@ -142,6 +142,7 @@ public:
|
||||
VAddr cpu_addr = 0;
|
||||
bool is_picked{};
|
||||
bool is_coherent{};
|
||||
bool is_deleted{};
|
||||
int stream_score = 0;
|
||||
size_t size_bytes = 0;
|
||||
std::span<u8> mapped_data;
|
||||
|
@ -20,7 +20,7 @@ static constexpr size_t StagingBufferSize = 1_GB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
texture_cache{texture_cache_}, tracker{tracker_},
|
||||
@ -70,11 +70,10 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
memory_tracker.ForEachDownloadRange<true>(
|
||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
const VAddr buffer_addr = buffer.CpuAddr();
|
||||
const auto add_download = [&](VAddr start, VAddr end, u64) {
|
||||
const auto add_download = [&](VAddr start, VAddr end) {
|
||||
const u64 new_offset = start - buffer_addr;
|
||||
const u64 new_size = end - start;
|
||||
copies.push_back(vk::BufferCopy{
|
||||
@ -82,12 +81,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||
.dstOffset = total_size_bytes,
|
||||
.size = new_size,
|
||||
});
|
||||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 64ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = std::max(largest_copy, new_size);
|
||||
total_size_bytes += new_size;
|
||||
};
|
||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
|
||||
gpu_modified_ranges.Subtract(device_addr_out, range_size);
|
||||
});
|
||||
if (total_size_bytes == 0) {
|
||||
return;
|
||||
@ -181,6 +178,9 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
|
||||
.divisor = 1,
|
||||
});
|
||||
}
|
||||
if (ranges.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ranges::sort(ranges, [](const BufferRange& lhv, const BufferRange& rhv) {
|
||||
return lhv.base_address < rhv.base_address;
|
||||
@ -269,48 +269,62 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
|
||||
return regs.num_indices;
|
||||
}
|
||||
|
||||
void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) {
|
||||
ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned");
|
||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
|
||||
memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const Buffer* buffer = [&] {
|
||||
if (is_gds) {
|
||||
return &gds_buffer;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
const vk::BufferMemoryBarrier2 buf_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = gds_buffer.Handle(),
|
||||
.offset = gds_offset,
|
||||
.size = sizeof(u32),
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &buf_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value);
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer) {
|
||||
bool is_texel_buffer, BufferId buffer_id) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
// use device local stream buffer to reduce renderpass breaks.
|
||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
|
||||
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
// use device local stream buffer to reduce renderpass breaks.
|
||||
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
|
||||
return {&stream_buffer, offset};
|
||||
}
|
||||
|
||||
const BufferId buffer_id = FindBuffer(device_addr, size);
|
||||
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
|
||||
buffer_id = FindBuffer(device_addr, size);
|
||||
}
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||
if (is_written) {
|
||||
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
|
||||
gpu_modified_ranges.Add(device_addr, size);
|
||||
}
|
||||
return {&buffer, buffer.Offset(device_addr)};
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) {
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) {
|
||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||
const BufferId buffer_id = page_table[page];
|
||||
if (buffer_id) {
|
||||
@ -474,7 +488,7 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
DeleteBuffer(overlap_id, true);
|
||||
DeleteBuffer(overlap_id);
|
||||
}
|
||||
|
||||
BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
@ -529,7 +543,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
const auto add_copy = [&](VAddr device_addr_out, u64 range_size) {
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = total_size_bytes,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
@ -537,11 +551,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
});
|
||||
total_size_bytes += range_size;
|
||||
largest_copy = std::max(largest_copy, range_size);
|
||||
};
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
add_copy(device_addr_out, range_size);
|
||||
// Prevent uploading to gpu modified regions.
|
||||
// gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy);
|
||||
});
|
||||
SCOPE_EXIT {
|
||||
if (is_texel_buffer) {
|
||||
@ -654,14 +663,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||
// Mark the whole buffer as CPU written to stop tracking CPU writes
|
||||
if (!do_not_mark) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
}
|
||||
void BufferCache::DeleteBuffer(BufferId buffer_id) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
Unregister(buffer_id);
|
||||
scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); });
|
||||
buffer.is_deleted = true;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/memory_tracker_base.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@ -53,7 +54,7 @@ public:
|
||||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
const AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
@ -80,15 +81,16 @@ public:
|
||||
/// Bind host index buffer for the current draw.
|
||||
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||
|
||||
/// Writes a value to GDS buffer.
|
||||
void InlineDataToGds(u32 gds_offset, u32 value);
|
||||
/// Writes a value to GPU buffer.
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false);
|
||||
bool is_texel_buffer = false,
|
||||
BufferId buffer_id = {});
|
||||
|
||||
/// Obtains a temporary buffer for usage in texture cache.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size);
|
||||
/// Attempts to obtain a buffer without modifying the cache contents.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size);
|
||||
|
||||
/// Return true when a region is registered on the cache
|
||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||
@ -99,6 +101,8 @@ public:
|
||||
/// Return true when a CPU region is modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||
|
||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||
|
||||
private:
|
||||
template <typename Func>
|
||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||
@ -119,8 +123,6 @@ private:
|
||||
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||
|
||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
|
||||
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
|
||||
@ -138,11 +140,11 @@ private:
|
||||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
|
||||
void DeleteBuffer(BufferId buffer_id);
|
||||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
const AmdGpu::Liverpool* liverpool;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
TextureCache& texture_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging_buffer;
|
||||
@ -150,6 +152,7 @@ private:
|
||||
Buffer gds_buffer;
|
||||
std::mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
vk::BufferView null_buffer_view;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -113,140 +112,45 @@ ComputePipeline::~ComputePipeline() = default;
|
||||
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
|
||||
BufferBarriers buffer_barriers;
|
||||
Shader::PushData push_data{};
|
||||
Shader::Backend::Bindings binding{};
|
||||
|
||||
info->PushUd(binding, push_data);
|
||||
|
||||
buffer_infos.clear();
|
||||
buffer_views.clear();
|
||||
image_infos.clear();
|
||||
|
||||
info->PushUd(binding, push_data);
|
||||
for (const auto& desc : info->buffers) {
|
||||
bool is_storage = true;
|
||||
if (desc.is_gds_buffer) {
|
||||
auto* vk_buffer = buffer_cache.GetGdsBuffer();
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes());
|
||||
} else {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
is_storage = desc.IsStorage(vsharp);
|
||||
const VAddr address = vsharp.base_address;
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways. For cases of metadata read a warning will be
|
||||
// logged.
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways. For cases of metadata read a warning will be logged.
|
||||
for (const auto& desc : info->texture_buffers) {
|
||||
const VAddr address = desc.GetSharp(*info).base_address;
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 alignment =
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % 4 == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust);
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
const auto null_buffer_view =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
|
||||
for (const auto& desc : info->texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
if (auto barrier =
|
||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead,
|
||||
vk::PipelineStageFlagBits2::eComputeShader)) {
|
||||
buffer_barriers.emplace_back(*barrier);
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemoryFromGPU(address, size);
|
||||
}
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes,
|
||||
buffer_barriers);
|
||||
|
||||
BindTextures(texture_cache, *info, binding, set_writes);
|
||||
|
||||
for (const auto& sampler : info->samplers) {
|
||||
const auto ssharp = sampler.GetSharp(*info);
|
||||
if (ssharp.force_degamma) {
|
||||
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
|
||||
}
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
if (set_writes.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
if (!buffer_barriers.empty()) {
|
||||
const auto dependencies = vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
@ -257,21 +161,22 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||
cmdbuf.pipelineBarrier2(dependencies);
|
||||
}
|
||||
|
||||
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
|
||||
&push_data);
|
||||
|
||||
// Bind descriptor set.
|
||||
if (uses_push_descriptors) {
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
} else {
|
||||
const auto desc_set = desc_heap.Commit(*desc_layout);
|
||||
for (auto& set_write : set_writes) {
|
||||
set_write.dstSet = desc_set;
|
||||
}
|
||||
instance.GetDevice().updateDescriptorSets(set_writes, {});
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set,
|
||||
{});
|
||||
return true;
|
||||
}
|
||||
const auto desc_set = desc_heap.Commit(*desc_layout);
|
||||
for (auto& set_write : set_writes) {
|
||||
set_write.dstSet = desc_set;
|
||||
}
|
||||
instance.GetDevice().updateDescriptorSets(set_writes, {});
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, {});
|
||||
|
||||
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
|
||||
&push_data);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
@ -384,13 +384,13 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
||||
VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers;
|
||||
BufferBarriers buffer_barriers;
|
||||
Shader::PushData push_data{};
|
||||
Shader::Backend::Bindings binding{};
|
||||
|
||||
buffer_infos.clear();
|
||||
buffer_views.clear();
|
||||
image_infos.clear();
|
||||
|
||||
for (const auto* stage : stages) {
|
||||
@ -402,111 +402,22 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
}
|
||||
stage->PushUd(binding, push_data);
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto vsharp = buffer.GetSharp(*stage);
|
||||
const bool is_storage = buffer.IsStorage(vsharp);
|
||||
if (vsharp && vsharp.GetSize() > 0) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 alignment =
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % 4 == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust);
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
} else {
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
const auto null_buffer_view =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
|
||||
for (const auto& desc : stage->texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(*stage);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead;
|
||||
if (auto barrier = vk_buffer->GetBarrier(
|
||||
dst_access, vk::PipelineStageFlagBits2::eVertexShader)) {
|
||||
buffer_barriers.emplace_back(*barrier);
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemoryFromGPU(address, size);
|
||||
}
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, set_writes,
|
||||
buffer_barriers);
|
||||
|
||||
BindTextures(texture_cache, *stage, binding, set_writes);
|
||||
|
||||
for (const auto& sampler : stage->samplers) {
|
||||
auto ssharp = sampler.GetSharp(*stage);
|
||||
if (ssharp.force_degamma) {
|
||||
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
|
||||
}
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage);
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
|
||||
}
|
||||
}
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
SCOPE_EXIT {
|
||||
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
|
||||
};
|
||||
|
||||
if (set_writes.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!buffer_barriers.empty()) {
|
||||
const auto dependencies = vk::DependencyInfo{
|
||||
@ -518,22 +429,18 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
||||
cmdbuf.pipelineBarrier2(dependencies);
|
||||
}
|
||||
|
||||
if (!set_writes.empty()) {
|
||||
if (uses_push_descriptors) {
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
} else {
|
||||
const auto desc_set = desc_heap.Commit(*desc_layout);
|
||||
for (auto& set_write : set_writes) {
|
||||
set_write.dstSet = desc_set;
|
||||
}
|
||||
instance.GetDevice().updateDescriptorSets(set_writes, {});
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||
desc_set, {});
|
||||
}
|
||||
// Bind descriptor set.
|
||||
if (uses_push_descriptors) {
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||
set_writes);
|
||||
return;
|
||||
}
|
||||
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
|
||||
const auto desc_set = desc_heap.Commit(*desc_layout);
|
||||
for (auto& set_write : set_writes) {
|
||||
set_write.dstSet = desc_set;
|
||||
}
|
||||
instance.GetDevice().updateDescriptorSets(set_writes, {});
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, desc_set, {});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
@ -12,6 +13,8 @@
|
||||
namespace Vulkan {
|
||||
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> Pipeline::image_infos;
|
||||
boost::container::static_vector<vk::BufferView, 8> Pipeline::buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> Pipeline::buffer_infos;
|
||||
|
||||
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
|
||||
vk::PipelineCache pipeline_cache)
|
||||
@ -19,12 +22,133 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH
|
||||
|
||||
Pipeline::~Pipeline() = default;
|
||||
|
||||
void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
|
||||
Shader::Backend::Bindings& binding, Shader::PushData& push_data,
|
||||
DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const {
|
||||
using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
|
||||
static boost::container::static_vector<BufferBindingInfo, 32> buffer_bindings;
|
||||
|
||||
buffer_bindings.clear();
|
||||
|
||||
for (const auto& desc : stage.buffers) {
|
||||
const auto vsharp = desc.GetSharp(stage);
|
||||
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
|
||||
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
|
||||
buffer_bindings.emplace_back(buffer_id, vsharp);
|
||||
} else {
|
||||
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
|
||||
}
|
||||
}
|
||||
|
||||
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
|
||||
static boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings;
|
||||
|
||||
texbuffer_bindings.clear();
|
||||
|
||||
for (const auto& desc : stage.texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(stage);
|
||||
if (vsharp.base_address != 0 && vsharp.GetSize() > 0 &&
|
||||
vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
|
||||
texbuffer_bindings.emplace_back(buffer_id, vsharp);
|
||||
} else {
|
||||
texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass to re-bind buffers that were updated after binding
|
||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||
const auto& desc = stage.buffers[i];
|
||||
const bool is_storage = desc.IsStorage(vsharp);
|
||||
if (!buffer_id) {
|
||||
if (desc.is_gds_buffer) {
|
||||
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
} else {
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
} else {
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id);
|
||||
const u32 alignment =
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % 4 == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust);
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
|
||||
vsharp.GetSize() + adjust);
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
const auto null_buffer_view =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
|
||||
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
|
||||
const auto& desc = stage.texture_buffers[i];
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
|
||||
if (buffer_id) {
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
|
||||
buffer_view =
|
||||
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
if (auto barrier =
|
||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead,
|
||||
vk::PipelineStageFlagBits2::eComputeShader)) {
|
||||
buffer_barriers.emplace_back(*barrier);
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
|
||||
}
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
}
|
||||
|
||||
void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
|
||||
Shader::Backend::Bindings& binding,
|
||||
DescriptorWrites& set_writes) const {
|
||||
|
||||
using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>;
|
||||
boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
|
||||
static boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
|
||||
|
||||
image_bindings.clear();
|
||||
|
||||
for (const auto& image_desc : stage.images) {
|
||||
const auto tsharp = image_desc.GetSharp(stage);
|
||||
@ -76,6 +200,26 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& sampler : stage.samplers) {
|
||||
auto ssharp = sampler.GetSharp(stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage);
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
|
||||
}
|
||||
}
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.pImageInfo = &image_infos.back(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -33,6 +33,13 @@ public:
|
||||
}
|
||||
|
||||
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
|
||||
using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>;
|
||||
|
||||
void BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache,
|
||||
const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Shader::PushData& push_data, DescriptorWrites& set_writes,
|
||||
BufferBarriers& buffer_barriers) const;
|
||||
|
||||
void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
|
||||
Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const;
|
||||
|
||||
@ -44,6 +51,8 @@ protected:
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
vk::UniqueDescriptorSetLayout desc_layout;
|
||||
static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
static boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
static boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -98,10 +98,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
|
||||
|
||||
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
|
||||
buffer_cache.BindVertexBuffers(vs_info);
|
||||
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||
buffer_cache.BindIndexBuffer(is_indexed, 0);
|
||||
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true);
|
||||
const auto total_offset = base + offset;
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
|
||||
|
||||
BeginRendering(*pipeline);
|
||||
UpdateDynamicState(*pipeline);
|
||||
@ -110,9 +109,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
|
||||
// instance offsets will be automatically applied by Vulkan from indirect args buffer.
|
||||
|
||||
if (is_indexed) {
|
||||
cmdbuf.drawIndexedIndirect(buffer->Handle(), total_offset, 1, 0);
|
||||
cmdbuf.drawIndexedIndirect(buffer->Handle(), base, 1, 0);
|
||||
} else {
|
||||
cmdbuf.drawIndirect(buffer->Handle(), total_offset, 1, 0);
|
||||
cmdbuf.drawIndirect(buffer->Handle(), base, 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -161,9 +160,8 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||
|
||||
scheduler.EndRendering();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true);
|
||||
const auto total_offset = base + offset;
|
||||
cmdbuf.dispatchIndirect(buffer->Handle(), total_offset);
|
||||
const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
|
||||
cmdbuf.dispatchIndirect(buffer->Handle(), base);
|
||||
}
|
||||
|
||||
u64 Rasterizer::Flush() {
|
||||
@ -260,8 +258,8 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) {
|
||||
scheduler.BeginRendering(state);
|
||||
}
|
||||
|
||||
void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) {
|
||||
buffer_cache.InlineDataToGds(gds_offset, value);
|
||||
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
buffer_cache.InlineData(address, value, num_bytes, is_gds);
|
||||
}
|
||||
|
||||
u32 Rasterizer::ReadDataFromGds(u32 gds_offset) {
|
||||
|
@ -42,7 +42,7 @@ public:
|
||||
void ScopedMarkerInsert(const std::string_view& str);
|
||||
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color);
|
||||
|
||||
void InlineDataToGds(u32 gds_offset, u32 value);
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
u32 ReadDataFromGds(u32 gsd_offset);
|
||||
void InvalidateMemory(VAddr addr, u64 size);
|
||||
void MapMemory(VAddr addr, u64 size);
|
||||
|
@ -8,6 +8,9 @@
|
||||
namespace VideoCore {
|
||||
|
||||
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) {
|
||||
if (sampler.force_degamma) {
|
||||
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
|
||||
}
|
||||
using namespace Vulkan;
|
||||
const vk::SamplerCreateInfo sampler_ci = {
|
||||
.magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter),
|
||||
|
@ -427,7 +427,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size_bytes;
|
||||
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
|
||||
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size);
|
||||
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
|
||||
// hazard
|
||||
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
|
Loading…
Reference in New Issue
Block a user