GPUDevice: Add compute shader support

This commit is contained in:
Stenzek 2024-11-20 20:44:37 +10:00
parent 378fd80e3d
commit c21ea3c85b
No known key found for this signature in database
18 changed files with 577 additions and 122 deletions

View File

@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders =
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.explicit_present = false;
@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(res.index_aligned == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
}
void* D3D11Device::MapUniformBuffer(u32 size)
@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += size;
BindUniformBuffer(pos, req_size);
}
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
{
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = pos / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
const UINT first_constant = offset / 16u;
const UINT num_constants = size / 16u;
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
}
else
{
DebugAssert(pos == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
DebugAssert(offset == 0);
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
else
{
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
}
@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
if (!m_current_compute_shader)
{
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
}
else
{
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
}
}
else
{
@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
{
m_current_textures[slot] = T;
m_context->PSSetShaderResources(slot, 1, &T);
if (m_current_compute_shader)
m_context->CSSetShaderResources(slot, 1, &T);
}
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
m_context->PSSetSamplers(slot, 1, &S);
if (m_current_compute_shader)
m_context->CSSetSamplers(slot, 1, &S);
}
}
@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
if (m_current_textures[slot] != B)
{
m_current_textures[slot] = B;
// Compute doesn't support texture buffers, yet...
m_context->PSSetShaderResources(slot, 1, &B);
}
}
@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->Draw(vertex_count, base_vertex);
}
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->DrawIndexed(index_count, base_index, base_vertex);
}
@ -1129,3 +1151,10 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D11Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
DebugAssert(m_current_compute_shader);
s_stats.num_draws++;
m_context->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@ -75,6 +75,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -98,6 +99,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -140,6 +142,8 @@ private:
bool CreateBuffers();
void DestroyBuffers();
void BindUniformBuffer(u32 offset, u32 size);
void UnbindComputePipeline();
bool IsRenderTargetBound(const D3D11Texture* tex) const;
@ -180,6 +184,7 @@ private:
ID3D11VertexShader* m_current_vertex_shader = nullptr;
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
ID3D11PixelShader* m_current_pixel_shader = nullptr;
ID3D11ComputeShader* m_current_compute_shader = nullptr;
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
ID3D11DepthStencilState* m_current_depth_state = nullptr;
ID3D11BlendState* m_current_blend_state = nullptr;

View File

@ -3,6 +3,7 @@
#include "d3d11_pipeline.h"
#include "d3d11_device.h"
#include "d3d11_texture.h"
#include "d3d_common.h"
#include "common/assert.h"
@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
{
}
@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
return h;
}
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
Error* error)
{
ComPtr<ID3D11BlendState> dbs;
@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
if (!config.compute_shader) [[unlikely]]
{
Error::SetStringView(error, "Missing compute shader.");
return {};
}
return std::unique_ptr<GPUPipeline>(
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
{
if (m_current_pipeline == pipeline)
return;
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
m_current_pipeline = PL;
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
if (!PL->IsComputePipeline())
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (was_compute)
UnbindComputePipeline();
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
}
}
else
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
{
m_current_compute_shader = cs;
m_context->CSSetShader(cs, nullptr, 0);
}
if (!was_compute)
{
// need to bind all SRVs/samplers
u32 count;
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
{
if (!m_current_textures[count])
break;
}
if (count > 0)
{
m_context->CSSetShaderResources(0, count, m_current_textures.data());
m_context->CSSetSamplers(0, count, m_current_samplers.data());
}
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargets(0, nullptr, nullptr);
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
}
}
}
}
@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
if (m_current_pipeline != pl)
return;
if (pl->IsComputePipeline())
UnbindComputePipeline();
// Let the runtime deal with the dead objects...
m_current_pipeline = nullptr;
}
void D3D11Device::UnbindComputePipeline()
{
m_current_compute_shader = nullptr;
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
m_context->CSSetShader(nullptr, nullptr, 0);
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
}

View File

@ -51,13 +51,18 @@ public:
void SetDebugName(std::string_view name) override;
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
{
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
}
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
@ -66,7 +71,8 @@ public:
private:
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
u32 blend_factor);
ComPtr<ID3D11RasterizerState> m_rs;
ComPtr<ID3D11DepthStencilState> m_ds;
@ -74,7 +80,7 @@ private:
ComPtr<ID3D11InputLayout> m_il;
ComPtr<ID3D11VertexShader> m_vs;
ComPtr<ID3D11GeometryShader> m_gs;
ComPtr<ID3D11PixelShader> m_ps;
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
D3D11_PRIMITIVE_TOPOLOGY m_topology;
u32 m_vertex_stride;
u32 m_blend_factor;

View File

@ -115,6 +115,8 @@ public:
ComputePipelineBuilder();
~ComputePipelineBuilder() = default;
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
void Clear();
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);

View File

@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = false;
m_features.explicit_present = true;
@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
1, // SingleTextureBufferAndPushConstants
0, // MultiTextureAndUBO
2, // MultiTextureAndPushConstants
2, // ComputeSingleTextureAndPushConstants
};
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
const u32 push_param =
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
else
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
}
void* D3D12Device::MapUniformBuffer(u32 size)
@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
}
}
{
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
}
return true;
}
@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
if (m_current_depth_target)
@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
BeginRenderPass();
}
void D3D12Device::PreDispatchCheck()
{
if (InRenderPass())
EndRenderPass();
// Transition images.
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
{
// Still need to clear the RTs.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 dirty = std::exchange(m_dirty_flags, 0);
if (dirty != 0)
{
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
{
UpdateRootSignature();
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
{
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
}
}
bool D3D12Device::IsUsingROVRootSignature() const
{
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
}
bool D3D12Device::IsUsingComputeRootSignature() const
{
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
}
void D3D12Device::UpdateRootSignature()
{
GetCommandList()->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
{
cmdlist->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
}
else
{
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
}
}
template<GPUPipeline::Layout layout>
@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
}
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
return false;
}
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
}
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
2);
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
}
return true;
@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@ -2331,3 +2435,10 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D12Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
PreDispatchCheck();
s_stats.num_draws++;
GetCommandList()->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@ -96,6 +96,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -119,6 +120,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -275,8 +277,10 @@ private:
ID3D12RootSignature* GetCurrentRootSignature() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
bool IsUsingROVRootSignature() const;
bool IsUsingComputeRootSignature() const;
void UpdateRootSignature();
template<GPUPipeline::Layout layout>
bool UpdateParametersForLayout(u32 dirty);

View File

@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
return SHA1Digest::DigestToString(digest);
}
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
{
SHA1Digest hash;
hash.Update(&config.layout, sizeof(config.layout));
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
u8 digest[SHA1Digest::DIGEST_SIZE];
hash.Final(digest);
return SHA1Digest::DigestToString(digest);
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
{
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
D3D12::ComputePipelineBuilder cpb;
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
ComPtr<ID3D12PipelineState> pipeline;
if (m_pipeline_library)
{
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
HRESULT hr =
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
if (FAILED(hr))
{
// E_INVALIDARG = not found.
if (hr != E_INVALIDARG)
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
// Need to create it normally.
pipeline = cpb.Create(m_device.Get(), error, false);
// Store if it wasn't an OOM or something else.
if (pipeline && hr == E_INVALIDARG)
{
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
if (FAILED(hr))
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
}
}
}
else
{
pipeline = cpb.Create(m_device.Get(), error, false);
}
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}

View File

@ -51,6 +51,7 @@ public:
void SetDebugName(std::string_view name) override;
static std::string GetPipelineName(const GraphicsConfig& config);
static std::string GetPipelineName(const ComputeConfig& config);
private:
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,

View File

@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
// Need to know if there's UBOs for mapping.
const spvc_reflected_resource *ubos, *textures;
size_t ubos_count, textures_count;
size_t ubos_count, textures_count, images_count;
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
&ubos_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
&textures, &textures_count)) != SPVC_SUCCESS)
&textures, &textures_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
&textures, &images_count)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
return {};
@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
switch (target_language)
{
@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
}
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < images_count; i++)
{
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
.desc_set = IMAGE_DESCRIPTOR_SET,
.binding = i,
.cbv = {},
.uav = {.register_space = 0, .register_binding = i},
.srv = {},
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}
}
}
break;
#endif
@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
if (stage == GPUShaderStage::Fragment)
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
.msl_buffer = 0,
.msl_texture = 0,
.msl_sampler = 0};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
static_cast<int>(sres));
return {};
}
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
.desc_set = 1,
const spvc_msl_resource_binding rb = {.stage = execmodel,
.desc_set = TEXTURE_DESCRIPTOR_SET,
.binding = i,
.msl_buffer = i,
.msl_texture = i,
@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
}
}
if (!m_features.framebuffer_fetch)
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
{
const spvc_msl_resource_binding rb = {
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
return {};
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
const spvc_msl_resource_binding rb = {
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}

View File

@ -160,6 +160,9 @@ public:
// Multiple textures, 128 byte UBO via push constants.
MultiTextureAndPushConstants,
// 128 byte UBO via push constants, 1 texture, compute shader.
ComputeSingleTextureAndPushConstants,
MaxCount
};
@ -416,6 +419,12 @@ public:
u32 GetRenderTargetCount() const;
};
struct ComputeConfig
{
Layout layout;
GPUShader* compute_shader;
};
GPUPipeline();
virtual ~GPUPipeline();
@ -501,9 +510,10 @@ public:
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
};
enum class DrawBarrier : u32
@ -532,6 +542,7 @@ public:
bool texture_buffers_emulated_with_ssbo : 1;
bool feedback_loops : 1;
bool geometry_shaders : 1;
bool compute_shaders : 1;
bool partial_msaa_resolve : 1;
bool memory_import : 1;
bool explicit_present : 1;
@ -625,11 +636,20 @@ public:
0, // SingleTextureBufferAndPushConstants
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
1, // ComputeSingleTextureAndPushConstants
};
return counts[static_cast<u8>(layout)];
}
/// Returns the number of thread groups to dispatch for a given total count and local size.
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
u32 local_size_y, u32 local_size_z)
{
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
(count_z + (local_size_z - 1)) / local_size_z);
}
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
@ -638,10 +658,6 @@ public:
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
@ -712,6 +728,8 @@ public:
Error* error = nullptr, const char* entry_point = "main");
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
Error* error = nullptr) = 0;
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
Error* error = nullptr) = 0;
/// Debug messaging.
virtual void PushDebugGroup(const char* name) = 0;
@ -753,6 +771,7 @@ public:
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
virtual void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) = 0;
/// Returns false if the window was completely occluded.
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;

View File

@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
}
}
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
ERROR_LOG("Compute shaders are not yet supported.");
return {};
}
void OpenGLDevice::PushDebugGroup(const char* name)
{
#ifdef _DEBUG
@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
m_features.compute_shaders = false;
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
@ -1078,6 +1085,11 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
Panic("Barriers are not supported");
}
void OpenGLDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
Panic("Compute shaders are not supported");
}
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
u32* map_base_vertex)
{

View File

@ -77,6 +77,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -100,6 +101,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;

View File

@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
m_smap_constants = {};
}
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
bool clear /*= true*/)
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
Error* error)
{
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
return VK_NULL_HANDLE;
}

View File

@ -197,7 +197,7 @@ public:
void Clear();
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
void SetShader(VkShaderModule module, const char* entry_point);

View File

@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
WARNING_LOG("Emulating texture buffers with SSBOs.");
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts()
}
{
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts()
if (m_optional_extensions.vk_khr_push_descriptor)
dslb.SetPushFlag();
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts()
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
}
if (m_features.raster_order_views)
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
}
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
for (u32 type = 0; type < 3; type++)
{
@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
}
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
}
return true;
}
@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts()
l = VK_NULL_HANDLE;
}
};
destroy_dsl(m_rov_ds_layout);
destroy_dsl(m_image_ds_layout);
destroy_dsl(m_feedback_loop_ds_layout);
destroy_dsl(m_multi_texture_ds_layout);
destroy_dsl(m_single_texture_buffer_ds_layout);
@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck()
}
}
void VulkanDevice::PreDispatchCheck()
{
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
}
// Binding as image, but we still need to clear it.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* rt = m_current_render_targets[i];
if (rt->GetState() == GPUTexture::State::Cleared)
rt->CommitClear(m_current_command_buffer);
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 dirty = m_dirty_flags & update_mask;
m_dirty_flags = m_dirty_flags & ~update_mask;
if (dirty != 0)
{
if (!UpdateDescriptorSets(dirty))
{
SubmitCommandBuffer(false, "out of descriptor sets");
PreDispatchCheck();
return;
}
}
}
template<GPUPipeline::Layout layout>
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
[[maybe_unused]] bool new_dynamic_offsets = false;
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
constexpr VkPipelineBindPoint vk_bind_point =
((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS :
VK_PIPELINE_BIND_POINT_COMPUTE);
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0;
u32 num_ds = 0;
@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
{
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
if (num_ds == 0)
return true;
}
@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
if (ids == VK_NULL_HANDLE)
return false;
@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
DebugAssert(num_ds > 0);
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
static_cast<u32>(new_dynamic_offsets),
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
return true;
@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@ -3911,3 +3972,10 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
DefaultCaseIsUnreachable();
}
}
void VulkanDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
PreDispatchCheck();
s_stats.num_draws++;
vkCmdDispatch(GetCurrentCommandBuffer(), thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@ -113,6 +113,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -136,6 +137,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -373,6 +375,7 @@ private:
VkPipelineLayout GetCurrentVkPipelineLayout() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
template<GPUPipeline::Layout layout>
bool UpdateDescriptorSetsForLayout(u32 dirty);
@ -435,7 +438,7 @@ private:
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
static_cast<size_t>(PipelineLayoutType::MaxCount)>
m_pipeline_layouts = {};

View File

@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
return std::unique_ptr<GPUPipeline>(
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
}
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
Vulkan::ComputePipelineBuilder cpb;
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
}