diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index c18b606d3..710de1e57 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -696,7 +696,14 @@ GPUDevice::PresentResult D3D11Device::BeginPresent(GPUSwapChain* swap_chain, u32 } m_context->ClearRenderTargetView(SC->GetRTV(), GSVector4::unorm8(clear_color).F32); - m_context->OMSetRenderTargets(1, SC->GetRTVArray(), nullptr); + + // Ugh, have to clear out any UAV bindings... + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages && !m_current_compute_shader) + m_context->OMSetRenderTargetsAndUnorderedAccessViews(1, SC->GetRTVArray(), nullptr, 0, 0, nullptr, nullptr); + else + m_context->OMSetRenderTargets(1, SC->GetRTVArray(), nullptr); + if (m_current_compute_shader) + UnbindComputePipeline(); s_stats.num_render_passes++; m_num_current_render_targets = 0; m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index b65dbf6ea..92344bc77 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1764,7 +1764,19 @@ bool D3D12Device::CreateRootSignatures(Error* error) } { - auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; + auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeMultiTextureAndUBO)]; + + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Compute Multi Texture + UBO Pipeline Layout"); + } + + { + auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)]; rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); @@ -1772,7 +1784,7 @@ bool D3D12Device::CreateRootSignatures(Error* error) rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); if (!(rs = rsb.Create(error, true))) return false; - D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout"); + D3D12::SetObjectName(rs.Get(), "Compute Multi Texture Pipeline Layout"); } return true; @@ -2058,7 +2070,7 @@ bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const void D3D12Device::InvalidateCachedState() { - DebugAssert(!m_in_render_pass);; + DebugAssert(!m_in_render_pass); m_dirty_flags = ALL_DIRTY_STATE & ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS); } @@ -2405,7 +2417,7 @@ void D3D12Device::PreDispatchCheck() for (u32 i = 0; i < num_textures; i++) { if (m_current_textures[i]) - m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); } if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) @@ -2459,7 +2471,7 @@ bool D3D12Device::IsUsingROVRootSignature() const bool D3D12Device::IsUsingComputeRootSignature() const { - return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants); + return IsComputeLayout(m_current_pipeline_layout); } void D3D12Device::UpdateRootSignature() @@ -2481,10 +2493,17 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) { ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); - if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || + layout == GPUPipeline::Layout::MultiTextureAndUBO || + layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO) { if (dirty & DIRTY_FLAG_CONSTANT_BUFFER) - cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); + { + if constexpr (!IsComputeLayout(layout)) + cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); + else + cmdlist->SetComputeRootConstantBufferView(3, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); + } } constexpr u32 num_textures = GetActiveTexturesForLayout(layout); @@ -2514,7 +2533,7 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + if constexpr (!IsComputeLayout(layout)) cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(0, gpu_handle); @@ -2535,7 +2554,7 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) return false; } - if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + if constexpr (!IsComputeLayout(layout)) cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(1, gpu_handle); @@ -2576,11 +2595,14 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); constexpr u32 rov_param = - (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ? - 1 : - ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : - 2); - if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + IsComputeLayout(layout) ? + 2 : + ((layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ? + 1 : + ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? + 3 : + 2)); + if constexpr (!IsComputeLayout(layout)) cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle); @@ -2608,8 +2630,11 @@ bool D3D12Device::UpdateRootParameters(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateParametersForLayout(dirty); - case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: - return UpdateParametersForLayout(dirty); + case GPUPipeline::Layout::ComputeMultiTextureAndUBO: + return UpdateParametersForLayout(dirty); + + case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants: + return UpdateParametersForLayout(dirty); default: UnreachableCode(); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 1a587f24a..176c9be75 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -182,8 +182,11 @@ public: // Multiple textures, 128 byte UBO via push constants. MultiTextureAndPushConstants, - // 128 byte UBO via push constants, 1 texture, compute shader. - ComputeSingleTextureAndPushConstants, + // Multiple textures, 1 streamed UBO, compute shader. + ComputeMultiTextureAndUBO, + + // 128 byte UBO via push constants, multiple textures, compute shader. + ComputeMultiTextureAndPushConstants, MaxCount }; @@ -697,12 +700,19 @@ public: 0, // SingleTextureBufferAndPushConstants MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants - 1, // ComputeSingleTextureAndPushConstants + MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndUBO + MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndPushConstants }; return counts[static_cast(layout)]; } + /// Returns true if the given pipeline layout is used for compute shaders. + static constexpr bool IsComputeLayout(GPUPipeline::Layout layout) + { + return (layout >= GPUPipeline::Layout::ComputeMultiTextureAndUBO); + } + /// Returns the number of thread groups to dispatch for a given total count and local size. static constexpr std::tuple GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x, u32 local_size_y, u32 local_size_z) diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index dea122d83..26d81370d 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -1627,7 +1627,7 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function); GPUPipeline::ComputeConfig config; - config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants; + config.layout = GPUPipeline::Layout::ComputeMultiTextureAndPushConstants; config.compute_shader = &temp_shader; std::unique_ptr pipeline = CreatePipeline(config, nullptr); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 237a54cd4..b75c9b16f 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2966,15 +2966,25 @@ bool VulkanDevice::CreatePipelineLayouts() } } + { + VkPipelineLayout& pl = m_pipeline_layouts[0][static_cast(GPUPipeline::Layout::ComputeMultiTextureAndUBO)]; + plb.AddDescriptorSet(m_ubo_ds_layout); + plb.AddDescriptorSet(m_multi_texture_ds_layout); + plb.AddDescriptorSet(m_image_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Compute Multi Texture + UBO Pipeline Layout"); + } + { VkPipelineLayout& pl = - m_pipeline_layouts[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; - plb.AddDescriptorSet(m_single_texture_ds_layout); + m_pipeline_layouts[0][static_cast(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)]; + plb.AddDescriptorSet(m_multi_texture_ds_layout); plb.AddDescriptorSet(m_image_ds_layout); plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; - Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout"); + Vulkan::SetObjectName(m_device, pl, "Compute Multi Texture Pipeline Layout"); } return true; @@ -3517,7 +3527,10 @@ void VulkanDevice::SetPipeline(GPUPipeline* pipeline) m_current_pipeline = static_cast(pipeline); - vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline()); + vkCmdBindPipeline(m_current_command_buffer, + IsComputeLayout(m_current_pipeline->GetLayout()) ? VK_PIPELINE_BIND_POINT_COMPUTE : + VK_PIPELINE_BIND_POINT_GRAPHICS, + m_current_pipeline->GetPipeline()); if (m_current_pipeline_layout != m_current_pipeline->GetLayout()) { @@ -3562,7 +3575,9 @@ VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const { - return m_pipeline_layouts[static_cast(GetPipelineLayoutType(m_current_render_pass_flags))] + return m_pipeline_layouts[IsComputeLayout(m_current_pipeline_layout) ? + 0 : + static_cast(GetPipelineLayoutType(m_current_render_pass_flags))] [static_cast(m_current_pipeline_layout)]; } @@ -3778,14 +3793,15 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) [[maybe_unused]] bool new_dynamic_offsets = false; constexpr VkPipelineBindPoint vk_bind_point = - ((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS : - VK_PIPELINE_BIND_POINT_COMPUTE); + (IsComputeLayout(layout) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS); const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout(); std::array ds; u32 first_ds = 0; u32 num_ds = 0; - if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || + layout == GPUPipeline::Layout::MultiTextureAndUBO || + layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO) { new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0); @@ -3801,8 +3817,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || - layout == GPUPipeline::Layout::SingleTextureAndPushConstants || - layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) + layout == GPUPipeline::Layout::SingleTextureAndPushConstants) { VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : static_cast(m_empty_texture.get()); @@ -3815,7 +3830,9 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) ds[num_ds++] = m_current_texture_buffer->GetDescriptorSet(); } else if constexpr (layout == GPUPipeline::Layout::MultiTextureAndUBO || - layout == GPUPipeline::Layout::MultiTextureAndPushConstants) + layout == GPUPipeline::Layout::MultiTextureAndPushConstants || + layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO || + layout == GPUPipeline::Layout::ComputeMultiTextureAndPushConstants) { Vulkan::DescriptorSetUpdateBuilder dsub; @@ -3925,8 +3942,11 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty) case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateDescriptorSetsForLayout(dirty); - case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: - return UpdateDescriptorSetsForLayout(dirty); + case GPUPipeline::Layout::ComputeMultiTextureAndUBO: + return UpdateDescriptorSetsForLayout(dirty); + + case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants: + return UpdateDescriptorSetsForLayout(dirty); default: UnreachableCode();