From c21ea3c85b76cf1ff8d16f8622021da9fbdbec3b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 20 Nov 2024 20:44:37 +1000 Subject: [PATCH] GPUDevice: Add compute shader support --- src/util/d3d11_device.cpp | 79 ++++++++++------ src/util/d3d11_device.h | 5 ++ src/util/d3d11_pipeline.cpp | 170 +++++++++++++++++++++++++---------- src/util/d3d11_pipeline.h | 12 ++- src/util/d3d12_builders.h | 2 + src/util/d3d12_device.cpp | 123 +++++++++++++++++++++++-- src/util/d3d12_device.h | 4 + src/util/d3d12_pipeline.cpp | 55 ++++++++++++ src/util/d3d12_pipeline.h | 1 + src/util/gpu_device.cpp | 68 ++++++++++++-- src/util/gpu_device.h | 33 +++++-- src/util/opengl_device.cpp | 12 +++ src/util/opengl_device.h | 2 + src/util/vulkan_builders.cpp | 5 +- src/util/vulkan_builders.h | 2 +- src/util/vulkan_device.cpp | 108 +++++++++++++++++----- src/util/vulkan_device.h | 5 +- src/util/vulkan_pipeline.cpp | 13 +++ 18 files changed, 577 insertions(+), 122 deletions(-) diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index a8bcfc6a8..2c4e71d26 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.texture_buffers_emulated_with_ssbo = false; m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); + m_features.compute_shaders = + (!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0); m_features.partial_msaa_resolve = false; m_features.memory_import = false; m_features.explicit_present = false; @@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += data_size; - if (m_uniform_buffer.IsUsingMapNoOverwrite()) - { - const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; - const UINT num_constants = req_size / 16u; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - } - else - { - DebugAssert(res.index_aligned == 0); - m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - } + BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size); } void* D3D11Device::MapUniformBuffer(u32 size) @@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size) m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += size; + BindUniformBuffer(pos, req_size); +} + +void D3D11Device::BindUniformBuffer(u32 offset, u32 size) +{ if (m_uniform_buffer.IsUsingMapNoOverwrite()) { - const UINT first_constant = pos / 16u; - const UINT num_constants = req_size / 16u; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + const UINT first_constant = offset / 16u; + const UINT num_constants = size / 16u; + if (m_current_compute_shader) + { + m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } + else + { + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } } else { - DebugAssert(pos == 0); - m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + DebugAssert(offset == 0); + if (m_current_compute_shader) + { + m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } + else + { + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } } } @@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu for (u32 i = 0; i < m_num_current_render_targets; i++) uavs[i] = m_current_render_targets[i]->GetD3DUAV(); - m_context->OMSetRenderTargetsAndUnorderedAccessViews( - 0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0, - m_num_current_render_targets, uavs.data(), nullptr); + if (!m_current_compute_shader) + { + m_context->OMSetRenderTargetsAndUnorderedAccessViews( + 0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0, + m_num_current_render_targets, uavs.data(), nullptr); + } + else + { + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr); + } } else { @@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s { m_current_textures[slot] = T; m_context->PSSetShaderResources(slot, 1, &T); + if (m_current_compute_shader) + m_context->CSSetShaderResources(slot, 1, &T); } if (m_current_samplers[slot] != S) { m_current_samplers[slot] = S; m_context->PSSetSamplers(slot, 1, &S); + if (m_current_compute_shader) + m_context->CSSetSamplers(slot, 1, &S); } } @@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) if (m_current_textures[slot] != B) { m_current_textures[slot] = B; + + // Compute doesn't support texture buffers, yet... m_context->PSSetShaderResources(slot, 1, &B); } } @@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc) void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) { - DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped()); + DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader); s_stats.num_draws++; m_context->Draw(vertex_count, base_vertex); } void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { - DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped()); + DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader); s_stats.num_draws++; m_context->DrawIndexed(index_count, base_index, base_vertex); } @@ -1129,3 +1151,10 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba { Panic("Barriers are not supported"); } + +void D3D11Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) +{ + DebugAssert(m_current_compute_shader); + s_stats.num_draws++; + m_context->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z); +} diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 2fbb2dfa4..721b26f79 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -75,6 +75,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -98,6 +99,7 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -140,6 +142,8 @@ private: bool CreateBuffers(); void DestroyBuffers(); + void BindUniformBuffer(u32 offset, u32 size); + void UnbindComputePipeline(); bool IsRenderTargetBound(const D3D11Texture* tex) const; @@ -180,6 +184,7 @@ private: ID3D11VertexShader* m_current_vertex_shader = nullptr; ID3D11GeometryShader* m_current_geometry_shader = nullptr; ID3D11PixelShader* m_current_pixel_shader = nullptr; + ID3D11ComputeShader* m_current_compute_shader = nullptr; ID3D11RasterizerState* m_current_rasterizer_state = nullptr; ID3D11DepthStencilState* m_current_depth_state = nullptr; ID3D11BlendState* m_current_blend_state = nullptr; diff --git a/src/util/d3d11_pipeline.cpp b/src/util/d3d11_pipeline.cpp index 0c2301cec..b0d4dd681 100644 --- a/src/util/d3d11_pipeline.cpp +++ b/src/util/d3d11_pipeline.cpp @@ -3,6 +3,7 @@ #include "d3d11_pipeline.h" #include "d3d11_device.h" +#include "d3d11_texture.h" #include "d3d_common.h" #include "common/assert.h" @@ -121,10 +122,10 @@ std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShaderStage st D3D11Pipeline::D3D11Pipeline(ComPtr rs, ComPtr ds, ComPtr bs, ComPtr il, ComPtr vs, - ComPtr gs, ComPtr ps, + ComPtr gs, ComPtr ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor) : m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)), - m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride), + m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride), m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor)) { } @@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c return h; } -D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error) +D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, + Error* error) { ComPtr dbs; @@ -365,69 +367,124 @@ std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::Grap primitives[static_cast(config.primitive)], vertex_stride, config.blend.constant)); } +std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + if (!config.compute_shader) [[unlikely]] + { + Error::SetStringView(error, "Missing compute shader."); + return {}; + } + + return std::unique_ptr( + new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + static_cast(config.compute_shader)->GetComputeShader(), + D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0)); +} + void D3D11Device::SetPipeline(GPUPipeline* pipeline) { if (m_current_pipeline == pipeline) return; + const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline(); D3D11Pipeline* const PL = static_cast(pipeline); m_current_pipeline = PL; - if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il) + if (!PL->IsComputePipeline()) { - m_current_input_layout = il; - m_context->IASetInputLayout(il); - } + if (was_compute) + UnbindComputePipeline(); - if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride) - { - const UINT offset = 0; - m_current_vertex_stride = PL->GetVertexStride(); - m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset); - } + if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il) + { + m_current_input_layout = il; + m_context->IASetInputLayout(il); + } - if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology) - { - m_current_primitive_topology = topology; - m_context->IASetPrimitiveTopology(topology); - } + if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride) + { + const UINT offset = 0; + m_current_vertex_stride = PL->GetVertexStride(); + m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset); + } - if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs) - { - m_current_vertex_shader = vs; - m_context->VSSetShader(vs, nullptr, 0); - } + if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology) + { + m_current_primitive_topology = topology; + m_context->IASetPrimitiveTopology(topology); + } - if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs) - { - m_current_geometry_shader = gs; - m_context->GSSetShader(gs, nullptr, 0); - } + if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs) + { + m_current_vertex_shader = vs; + m_context->VSSetShader(vs, nullptr, 0); + } - if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps) - { - m_current_pixel_shader = ps; - m_context->PSSetShader(ps, nullptr, 0); - } + if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs) + { + m_current_geometry_shader = gs; + m_context->GSSetShader(gs, nullptr, 0); + } - if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs) - { - m_current_rasterizer_state = rs; - m_context->RSSetState(rs); - } + if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps) + { + m_current_pixel_shader = ps; + m_context->PSSetShader(ps, nullptr, 0); + } - if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) - { - m_current_depth_state = ds; - m_context->OMSetDepthStencilState(ds, 0); - } + if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs) + { + m_current_rasterizer_state = rs; + m_context->RSSetState(rs); + } - if (ID3D11BlendState* bs = PL->GetBlendState(); - m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor()) + if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) + { + m_current_depth_state = ds; + m_context->OMSetDepthStencilState(ds, 0); + } + + if (ID3D11BlendState* bs = PL->GetBlendState(); + m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor()) + { + m_current_blend_state = bs; + m_current_blend_factor = PL->GetBlendFactor(); + m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu); + } + } + else { - m_current_blend_state = bs; - m_current_blend_factor = PL->GetBlendFactor(); - m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu); + if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader) + { + m_current_compute_shader = cs; + m_context->CSSetShader(cs, nullptr, 0); + } + + if (!was_compute) + { + // need to bind all SRVs/samplers + u32 count; + for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++) + { + if (!m_current_textures[count]) + break; + } + if (count > 0) + { + m_context->CSSetShaderResources(0, count, m_current_textures.data()); + m_context->CSSetSamplers(0, count, m_current_samplers.data()); + } + + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + { + ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS]; + for (u32 i = 0; i < m_num_current_render_targets; i++) + uavs[i] = m_current_render_targets[i]->GetD3DUAV(); + + m_context->OMSetRenderTargets(0, nullptr, nullptr); + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr); + } + } } } @@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl) if (m_current_pipeline != pl) return; + if (pl->IsComputePipeline()) + UnbindComputePipeline(); + // Let the runtime deal with the dead objects... m_current_pipeline = nullptr; } + +void D3D11Device::UnbindComputePipeline() +{ + m_current_compute_shader = nullptr; + + ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {}; + ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {}; + ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {}; + m_context->CSSetShader(nullptr, nullptr, 0); + m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs); + m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers); + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr); +} diff --git a/src/util/d3d11_pipeline.h b/src/util/d3d11_pipeline.h index 88e825750..c3d58d3b2 100644 --- a/src/util/d3d11_pipeline.h +++ b/src/util/d3d11_pipeline.h @@ -51,13 +51,18 @@ public: void SetDebugName(std::string_view name) override; + ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; } ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); } ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); } ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); } ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); } ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); } ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); } - ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); } + ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast(m_ps_or_cs.Get()); } + ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const + { + return static_cast(m_ps_or_cs.Get()); + } ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; } ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; } ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; } @@ -66,7 +71,8 @@ public: private: D3D11Pipeline(ComPtr rs, ComPtr ds, ComPtr bs, ComPtr il, ComPtr vs, ComPtr gs, - ComPtr ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor); + ComPtr ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, + u32 blend_factor); ComPtr m_rs; ComPtr m_ds; @@ -74,7 +80,7 @@ private: ComPtr m_il; ComPtr m_vs; ComPtr m_gs; - ComPtr m_ps; + ComPtr m_ps_or_cs; D3D11_PRIMITIVE_TOPOLOGY m_topology; u32 m_vertex_stride; u32 m_blend_factor; diff --git a/src/util/d3d12_builders.h b/src/util/d3d12_builders.h index ae2970716..9dbffcf77 100644 --- a/src/util/d3d12_builders.h +++ b/src/util/d3d12_builders.h @@ -115,6 +115,8 @@ public: ComputePipelineBuilder(); ~ComputePipelineBuilder() = default; + ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; } + void Clear(); Microsoft::WRL::ComPtr Create(ID3D12Device* device, Error* error, bool clear); diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index cd17a22e4..6610ebf1f 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab m_features.texture_buffers_emulated_with_ssbo = false; m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); + m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = false; m_features.explicit_present = true; @@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) 1, // SingleTextureBufferAndPushConstants 0, // MultiTextureAndUBO 2, // MultiTextureAndPushConstants + 2, // ComputeSingleTextureAndPushConstants }; DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); @@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) const u32 push_param = push_parameters[static_cast(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature()); - GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + if (!IsUsingComputeRootSignature()) + cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); + else + cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0); } void* D3D12Device::MapUniformBuffer(u32 size) @@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error) } } + { + auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; + + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL); + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout"); + } + return true; } @@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass() rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rt->SetUseFenceValue(GetCurrentFenceValue()); rt->CommitClear(cmdlist); + rt->SetState(GPUTexture::State::Dirty); } } if (m_current_depth_target) @@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck() BeginRenderPass(); } +void D3D12Device::PreDispatchCheck() +{ + if (InRenderPass()) + EndRenderPass(); + + // Transition images. + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) + { + // Still need to clear the RTs. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + D3D12Texture* const rt = m_current_render_targets[i]; + rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + rt->SetUseFenceValue(GetCurrentFenceValue()); + rt->CommitClear(cmdlist); + rt->SetState(GPUTexture::State::Dirty); + } + } + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); + + // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 dirty = std::exchange(m_dirty_flags, 0); + if (dirty != 0) + { + if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) + { + UpdateRootSignature(); + if (!UpdateRootParameters(dirty)) + { + SubmitCommandList(false, "out of descriptors"); + PreDispatchCheck(); + return; + } + } + else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) + { + if (!UpdateRootParameters(dirty)) + { + SubmitCommandList(false, "out of descriptors"); + PreDispatchCheck(); + return; + } + } + } +} + bool D3D12Device::IsUsingROVRootSignature() const { return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0); } +bool D3D12Device::IsUsingComputeRootSignature() const +{ + return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants); +} + void D3D12Device::UpdateRootSignature() { - GetCommandList()->SetGraphicsRootSignature( - m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + if (!IsUsingComputeRootSignature()) + { + cmdlist->SetGraphicsRootSignature( + m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); + } + else + { + cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast(m_current_pipeline_layout)].Get()); + } } template @@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(0, gpu_handle); } if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0) @@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) return false; } - cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(1, gpu_handle); } if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) @@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) 1 : ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : 2); - cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); + if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); + else + cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle); } return true; @@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateParametersForLayout(dirty); + case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: + return UpdateParametersForLayout(dirty); + default: UnreachableCode(); } @@ -2331,3 +2435,10 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba { Panic("Barriers are not supported"); } + +void D3D12Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) +{ + PreDispatchCheck(); + s_stats.num_draws++; + GetCommandList()->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z); +} diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index e20bd525f..96be99df0 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -96,6 +96,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -119,6 +120,7 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -275,8 +277,10 @@ private: ID3D12RootSignature* GetCurrentRootSignature() const; void SetInitialPipelineState(); void PreDrawCheck(); + void PreDispatchCheck(); bool IsUsingROVRootSignature() const; + bool IsUsingComputeRootSignature() const; void UpdateRootSignature(); template bool UpdateParametersForLayout(u32 dirty); diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp index c25a67c5c..4b78c2aff 100644 --- a/src/util/d3d12_pipeline.cpp +++ b/src/util/d3d12_pipeline.cpp @@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config) return SHA1Digest::DigestToString(digest); } +std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config) +{ + SHA1Digest hash; + hash.Update(&config.layout, sizeof(config.layout)); + if (const D3D12Shader* shader = static_cast(config.compute_shader)) + hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); + + u8 digest[SHA1Digest::DIGEST_SIZE]; + hash.Final(digest); + return SHA1Digest::DigestToString(digest); +} + std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) { static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = @@ -274,3 +286,46 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap pipeline, config.layout, primitives[static_cast(config.primitive)], config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant)); } + +std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + D3D12::ComputePipelineBuilder cpb; + cpb.SetRootSignature(m_root_signatures[0][static_cast(config.layout)].Get()); + cpb.SetShader(static_cast(config.compute_shader)->GetBytecodeData(), + static_cast(config.compute_shader)->GetBytecodeSize()); + + ComPtr pipeline; + if (m_pipeline_library) + { + const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config)); + HRESULT hr = + m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf())); + if (FAILED(hr)) + { + // E_INVALIDARG = not found. + if (hr != E_INVALIDARG) + ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast(hr)); + + // Need to create it normally. + pipeline = cpb.Create(m_device.Get(), error, false); + + // Store if it wasn't an OOM or something else. + if (pipeline && hr == E_INVALIDARG) + { + hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get()); + if (FAILED(hr)) + ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast(hr)); + } + } + } + else + { + pipeline = cpb.Create(m_device.Get(), error, false); + } + + if (!pipeline) + return {}; + + return std::unique_ptr( + new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0)); +} diff --git a/src/util/d3d12_pipeline.h b/src/util/d3d12_pipeline.h index bca9494fa..e2f83d14f 100644 --- a/src/util/d3d12_pipeline.h +++ b/src/util/d3d12_pipeline.h @@ -51,6 +51,7 @@ public: void SetDebugName(std::string_view name) override; static std::string GetPipelineName(const GraphicsConfig& config); + static std::string GetPipelineName(const ComputeConfig& config); private: D3D12Pipeline(Microsoft::WRL::ComPtr pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology, diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index d9554289c..6a4c3a6dd 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP // Need to know if there's UBOs for mapping. const spvc_reflected_resource *ubos, *textures; - size_t ubos_count, textures_count; + size_t ubos_count, textures_count, images_count; if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos, &ubos_count)) != SPVC_SUCCESS || (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, - &textures, &textures_count)) != SPVC_SUCCESS) + &textures, &textures_count)) != SPVC_SUCCESS || + (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, + &textures, &images_count)) != SPVC_SUCCESS) { Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast(sres)); return {}; @@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP [[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler); [[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0; [[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1; + [[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2; switch (target_language) { @@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP } } } + + if (stage == GPUShaderStage::Compute) + { + for (u32 i = 0; i < images_count; i++) + { + const spvc_hlsl_resource_binding rb = {.stage = execmodel, + .desc_set = IMAGE_DESCRIPTOR_SET, + .binding = i, + .cbv = {}, + .uav = {.register_space = 0, .register_binding = i}, + .srv = {}, + .sampler = {}}; + if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast(sres)); + return {}; + } + } + } } break; #endif @@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP return {}; } - if (stage == GPUShaderStage::Fragment) + const spvc_msl_resource_binding pc_rb = {.stage = execmodel, + .desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET, + .binding = SPVC_MSL_PUSH_CONSTANT_BINDING, + .msl_buffer = 0, + .msl_texture = 0, + .msl_sampler = 0}; + if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}", + static_cast(sres)); + return {}; + } + + if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { - const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment, - .desc_set = 1, + const spvc_msl_resource_binding rb = {.stage = execmodel, + .desc_set = TEXTURE_DESCRIPTOR_SET, .binding = i, .msl_buffer = i, .msl_texture = i, @@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span spirv, GP return {}; } } + } - if (!m_features.framebuffer_fetch) + if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch) + { + const spvc_msl_resource_binding rb = { + .stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; + + if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) + { + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", + static_cast(sres)); + return {}; + } + } + + if (stage == GPUShaderStage::Compute) + { + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) { const spvc_msl_resource_binding rb = { - .stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; + .stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i}; if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) { - Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", - static_cast(sres)); + Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast(sres)); return {}; } } diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 309b4db39..e29cedc31 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -160,6 +160,9 @@ public: // Multiple textures, 128 byte UBO via push constants. MultiTextureAndPushConstants, + // 128 byte UBO via push constants, 1 texture, compute shader. + ComputeSingleTextureAndPushConstants, + MaxCount }; @@ -416,6 +419,12 @@ public: u32 GetRenderTargetCount() const; }; + struct ComputeConfig + { + Layout layout; + GPUShader* compute_shader; + }; + GPUPipeline(); virtual ~GPUPipeline(); @@ -501,9 +510,10 @@ public: FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2), FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3), FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4), - FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5), - FEATURE_MASK_MEMORY_IMPORT = (1 << 6), - FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7), + FEATURE_MASK_COMPUTE_SHADERS = (1 << 5), + FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6), + FEATURE_MASK_MEMORY_IMPORT = (1 << 7), + FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8), }; enum class DrawBarrier : u32 @@ -532,6 +542,7 @@ public: bool texture_buffers_emulated_with_ssbo : 1; bool feedback_loops : 1; bool geometry_shaders : 1; + bool compute_shaders : 1; bool partial_msaa_resolve : 1; bool memory_import : 1; bool explicit_present : 1; @@ -625,11 +636,20 @@ public: 0, // SingleTextureBufferAndPushConstants MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants + 1, // ComputeSingleTextureAndPushConstants }; return counts[static_cast(layout)]; } + /// Returns the number of thread groups to dispatch for a given total count and local size. + static constexpr std::tuple GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x, + u32 local_size_y, u32 local_size_z) + { + return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y, + (count_z + (local_size_z - 1)) / local_size_z); + } + ALWAYS_INLINE const Features& GetFeatures() const { return m_features; } ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; } ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; } @@ -638,10 +658,6 @@ public: ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); } ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast(m_main_swap_chain); } - // ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); } - // ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); } - // ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; } - // ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; } ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); } ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); } @@ -712,6 +728,8 @@ public: Error* error = nullptr, const char* entry_point = "main"); virtual std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error = nullptr) = 0; + virtual std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, + Error* error = nullptr) = 0; /// Debug messaging. virtual void PushDebugGroup(const char* name) = 0; @@ -753,6 +771,7 @@ public: virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0; virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0; + virtual void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) = 0; /// Returns false if the window was completely occluded. virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index e929da63d..e085aa9de 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t) } } +std::unique_ptr OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + ERROR_LOG("Compute shaders are not yet supported."); + return {}; +} + void OpenGLDevice::PushDebugGroup(const char* name) { #ifdef _DEBUG @@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2); + m_features.compute_shaders = false; m_features.gpu_timing = !(m_gl_context->IsGLES() && (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)); @@ -1078,6 +1085,11 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b Panic("Barriers are not supported"); } +void OpenGLDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) +{ + Panic("Compute shaders are not supported"); +} + void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) { diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index e499c5931..eda03787d 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -77,6 +77,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -100,6 +101,7 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override; PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override; void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override; diff --git a/src/util/vulkan_builders.cpp b/src/util/vulkan_builders.cpp index d9a64a356..6c6251acb 100644 --- a/src/util/vulkan_builders.cpp +++ b/src/util/vulkan_builders.cpp @@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear() m_smap_constants = {}; } -VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/, - bool clear /*= true*/) +VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, + Error* error) { VkPipeline pipeline; VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: "); + SetErrorObject(error, "vkCreateComputePipelines() failed: ", res); return VK_NULL_HANDLE; } diff --git a/src/util/vulkan_builders.h b/src/util/vulkan_builders.h index f65f2e1aa..760caecee 100644 --- a/src/util/vulkan_builders.h +++ b/src/util/vulkan_builders.h @@ -197,7 +197,7 @@ public: void Clear(); - VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true); + VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error); void SetShader(VkShaderModule module, const char* entry_point); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 924ffde3f..97042cbec 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe WARNING_LOG("Emulating texture buffers with SSBOs."); m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader; + m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; @@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts() } { - dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout"); @@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts() if (m_optional_extensions.vk_khr_push_descriptor) dslb.SetPushFlag(); for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) - dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, + VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); @@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts() Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout"); } - if (m_features.raster_order_views) + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) { - for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) - dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout"); + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT); } + if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout"); for (u32 type = 0; type < 3; type++) { @@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); @@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); @@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts() if (feedback_loop) plb.AddDescriptorSet(m_feedback_loop_ds_layout); else if (rov) - plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); } } + { + VkPipelineLayout& pl = + m_pipeline_layouts[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); + plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout"); + } + return true; } @@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts() l = VK_NULL_HANDLE; } }; - destroy_dsl(m_rov_ds_layout); + destroy_dsl(m_image_ds_layout); destroy_dsl(m_feedback_loop_ds_layout); destroy_dsl(m_multi_texture_ds_layout); destroy_dsl(m_single_texture_buffer_ds_layout); @@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck() } } +void VulkanDevice::PreDispatchCheck() +{ + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + } + + // Binding as image, but we still need to clear it. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + VulkanTexture* rt = m_current_render_targets[i]; + if (rt->GetState() == GPUTexture::State::Cleared) + rt->CommitClear(m_current_command_buffer); + rt->SetState(GPUTexture::State::Dirty); + rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + } + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); + + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); + const u32 dirty = m_dirty_flags & update_mask; + m_dirty_flags = m_dirty_flags & ~update_mask; + + if (dirty != 0) + { + if (!UpdateDescriptorSets(dirty)) + { + SubmitCommandBuffer(false, "out of descriptor sets"); + PreDispatchCheck(); + return; + } + } +} + template bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { [[maybe_unused]] bool new_dynamic_offsets = false; - VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout(); + constexpr VkPipelineBindPoint vk_bind_point = + ((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS : + VK_PIPELINE_BIND_POINT_COMPUTE); + const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout(); std::array ds; u32 first_ds = 0; u32 num_ds = 0; @@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || - layout == GPUPipeline::Layout::SingleTextureAndPushConstants) + layout == GPUPipeline::Layout::SingleTextureAndPushConstants || + layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) { VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get(); DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE); @@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; - dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set); + dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set); if (num_ds == 0) return true; } @@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) { - VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout); + VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout); if (ids == VK_NULL_HANDLE) return false; @@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } DebugAssert(num_ds > 0); - vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds, - num_ds, ds.data(), static_cast(new_dynamic_offsets), + vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(), + static_cast(new_dynamic_offsets), new_dynamic_offsets ? &m_uniform_buffer_position : nullptr); return true; @@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateDescriptorSetsForLayout(dirty); + case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: + return UpdateDescriptorSetsForLayout(dirty); + default: UnreachableCode(); } @@ -3911,3 +3972,10 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b DefaultCaseIsUnreachable(); } } + +void VulkanDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) +{ + PreDispatchCheck(); + s_stats.num_draws++; + vkCmdDispatch(GetCurrentCommandBuffer(), thread_groups_x, thread_groups_y, thread_groups_z); +} diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index f2e870f93..6ac6c7ff5 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -113,6 +113,7 @@ public: std::string_view source, const char* entry_point, DynamicHeapArray* out_binary, Error* error) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; + std::unique_ptr CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override; void PushDebugGroup(const char* name) override; void PopDebugGroup() override; @@ -136,6 +137,7 @@ public: void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; + void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -373,6 +375,7 @@ private: VkPipelineLayout GetCurrentVkPipelineLayout() const; void SetInitialPipelineState(); void PreDrawCheck(); + void PreDispatchCheck(); template bool UpdateDescriptorSetsForLayout(u32 dirty); @@ -435,7 +438,7 @@ private: VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE; DimensionalArray(GPUPipeline::Layout::MaxCount), static_cast(PipelineLayoutType::MaxCount)> m_pipeline_layouts = {}; diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 52db0d766..a6d801c77 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -275,3 +275,16 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra return std::unique_ptr( new VulkanPipeline(pipeline, config.layout, static_cast(vertices_per_primitive), config.render_pass_flags)); } + +std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) +{ + Vulkan::ComputePipelineBuilder cpb; + cpb.SetShader(static_cast(config.compute_shader)->GetModule(), "main"); + cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast(config.layout)]); + + const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error); + if (!pipeline) + return {}; + + return std::unique_ptr(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags)); +}