// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "d3d12_device.h" #include "d3d12_builders.h" #include "d3d12_pipeline.h" #include "d3d12_stream_buffer.h" #include "d3d12_texture.h" #include "d3d_common.h" #include "common/align.h" #include "common/assert.h" #include "common/bitutils.h" #include "common/error.h" #include "common/file_system.h" #include "common/log.h" #include "common/path.h" #include "common/scoped_guard.h" #include "common/small_string.h" #include "common/string_util.h" #include "D3D12MemAlloc.h" #include "fmt/format.h" #include #include LOG_CHANNEL(GPUDevice); // Tweakables enum : u32 { MAX_DRAW_CALLS_PER_FRAME = 2048, MAX_DESCRIPTORS_PER_FRAME = 32768, MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_PERSISTENT_DESCRIPTORS = 2048, MAX_PERSISTENT_RTVS = 512, MAX_PERSISTENT_DSVS = 128, MAX_PERSISTENT_SAMPLERS = 512, VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, INDEX_BUFFER_SIZE = 16 * 1024 * 1024, VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, UNIFORM_PUSH_CONSTANTS_SIZE = 128, MAX_UNIFORM_BUFFER_SIZE = 1024, }; // We need to synchronize instance creation because of adapter enumeration from the UI thread. static std::mutex s_instance_mutex; static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8; // We just need to keep this alive, never reference it. static DynamicHeapArray s_pipeline_cache_data; #ifdef ENABLE_GPU_OBJECT_NAMES #include "WinPixEventRuntime/pix3.h" static u32 s_debug_scope_depth = 0; #endif static constexpr const u32 s_mipmap_blit_vs[] = { 0x43425844, 0xe0f571cf, 0x51234ef3, 0x3a6beab4, 0x141cd2ef, 0x00000001, 0x000003ac, 0x00000005, 0x00000034, 0x00000144, 0x00000178, 0x000001d0, 0x00000310, 0x46454452, 0x00000108, 0x00000001, 0x00000068, 0x00000001, 0x0000003c, 0xfffe0500, 0x00008100, 0x000000e0, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028, 0x00000024, 0x0000000c, 0x00000000, 0x0000005c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x424f4255, 0x6b636f6c, 0xababab00, 0x0000005c, 0x00000001, 0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x000000a8, 0x00000000, 0x00000010, 0x00000002, 0x000000bc, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0x72735f75, 0x65725f63, 0x66007463, 0x74616f6c, 0xabab0034, 0x00030001, 0x00040001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000b3, 0x7263694d, 0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978, 0x4e47534f, 0x00000050, 0x00000002, 0x00000008, 0x00000038, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000c03, 0x00000041, 0x00000000, 0x00000001, 0x00000003, 0x00000001, 0x0000000f, 0x43584554, 0x44524f4f, 0x5f565300, 0x69736f50, 0x6e6f6974, 0xababab00, 0x58454853, 0x00000138, 0x00010050, 0x0000004e, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04000060, 0x00101012, 0x00000000, 0x00000006, 0x03000065, 0x00102032, 0x00000000, 0x04000067, 0x001020f2, 0x00000001, 0x00000001, 0x02000068, 0x00000001, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000001, 0x00004001, 0x00000001, 0x0010100a, 0x00000000, 0x00004001, 0x00000000, 0x07000001, 0x00100042, 0x00000000, 0x0010100a, 0x00000000, 0x00004001, 0x00000002, 0x05000056, 0x00100032, 0x00000000, 0x00100086, 0x00000000, 0x0b000032, 0x00102032, 0x00000000, 0x00100046, 0x00000000, 0x00208ae6, 0x00000000, 0x00000000, 0x00208046, 0x00000000, 0x00000000, 0x0f000032, 0x00102032, 0x00000001, 0x00100046, 0x00000000, 0x00004002, 0x40000000, 0xc0000000, 0x00000000, 0x00000000, 0x00004002, 0xbf800000, 0x3f800000, 0x00000000, 0x00000000, 0x08000036, 0x001020c2, 0x00000001, 0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x3f800000, 0x0100003e, 0x54415453, 0x00000094, 0x00000007, 0x00000001, 0x00000000, 0x00000003, 0x00000002, 0x00000000, 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}; static constexpr const u32 s_mipmap_blit_ps[] = { 0x43425844, 0x25500f77, 0x71f24271, 0x5f83f8b8, 0x3f405943, 0x00000001, 0x0000026c, 0x00000005, 0x00000034, 0x000000f0, 0x00000124, 0x00000158, 0x000001d0, 0x46454452, 0x000000b4, 0x00000000, 0x00000000, 0x00000002, 0x0000003c, 0xffff0500, 0x00008100, 0x0000008b, 0x31314452, 0x0000003c, 0x00000018, 0x00000020, 0x00000028, 0x00000024, 0x0000000c, 0x00000000, 0x0000007c, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x00000085, 0x00000002, 0x00000005, 0x00000004, 0xffffffff, 0x00000000, 0x00000001, 0x0000000d, 0x706d6173, 0x73735f30, 0x6d617300, 0x4d003070, 0x6f726369, 0x74666f73, 0x29522820, 0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x30312072, 0xab00312e, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000303, 0x43584554, 0x44524f4f, 0xababab00, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000070, 0x00000050, 0x0000001c, 0x0100086a, 0x0300005a, 0x00106000, 0x00000000, 0x04001858, 0x00107000, 0x00000000, 0x00005555, 0x03001062, 0x00101032, 0x00000000, 0x03000065, 0x001020f2, 0x00000000, 0x8b000045, 0x800000c2, 0x00155543, 0x001020f2, 0x00000000, 0x00101046, 0x00000000, 0x00107e46, 0x00000000, 0x00106000, 0x00000000, 0x0100003e, 0x54415453, 0x00000094, 0x00000002, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}; D3D12Device::D3D12Device() { m_render_api = RenderAPI::D3D12; #ifdef ENABLE_GPU_OBJECT_NAMES s_debug_scope_depth = 0; #endif } D3D12Device::~D3D12Device() { Assert(!m_device); Assert(s_pipeline_cache_data.empty()); } D3D12Device::ComPtr D3D12Device::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, Error* error) { ComPtr blob; ComPtr error_blob; const HRESULT hr = D3D12SerializeRootSignature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), error_blob.GetAddressOf()); if (FAILED(hr)) [[unlikely]] { Error::SetHResult(error, "D3D12SerializeRootSignature() failed: ", hr); if (error_blob) ERROR_LOG(static_cast(error_blob->GetBufferPointer())); return {}; } return blob; } D3D12Device::ComPtr D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, Error* error) { ComPtr blob = SerializeRootSignature(desc, error); if (!blob) return {}; ComPtr rs; const HRESULT hr = m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf())); if (FAILED(hr)) [[unlikely]] { Error::SetHResult(error, "CreateRootSignature() failed: ", hr); return {}; } return rs; } bool D3D12Device::CreateDeviceAndMainSwapChain(std::string_view adapter, FeatureMask disabled_features, const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, const ExclusiveFullscreenMode* exclusive_fullscreen_mode, std::optional exclusive_fullscreen_control, Error* error) { std::unique_lock lock(s_instance_mutex); m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device, error); if (!m_dxgi_factory) return false; m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); HRESULT hr = S_OK; // Enabling the debug layer will fail if the Graphics Tools feature is not installed. if (m_debug_device) { ComPtr debug12; hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.GetAddressOf())); if (SUCCEEDED(hr)) { debug12->EnableDebugLayer(); } else { ERROR_LOG("Debug layer requested but not available."); m_debug_device = false; } } // Create the actual device. D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_1_0_CORE; for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_0}) { hr = D3D12CreateDevice(m_adapter.Get(), try_feature_level, IID_PPV_ARGS(&m_device)); if (SUCCEEDED(hr)) { feature_level = try_feature_level; break; } } if (FAILED(hr)) { Error::SetHResult(error, "Failed to create D3D12 device: ", hr); return false; } if (!m_adapter) { const LUID luid(m_device->GetAdapterLuid()); if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf())))) ERROR_LOG("Failed to get lookup adapter by device LUID"); } if (m_debug_device) { ComPtr info_queue; if (SUCCEEDED(m_device.As(&info_queue))) { if (IsDebuggerPresent()) { info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); } D3D12_INFO_QUEUE_FILTER filter = {}; std::array id_list{ D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND, }; filter.DenyList.NumIDs = static_cast(id_list.size()); filter.DenyList.pIDList = id_list.data(); info_queue->PushStorageFilter(&filter); } } const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, D3D12_COMMAND_QUEUE_FLAG_NONE, 0u}; hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); if (FAILED(hr)) { Error::SetHResult(error, "Failed to create command queue: ", hr); return false; } D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = m_device.Get(); allocatorDesc.pAdapter = m_adapter.Get(); allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED | D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/; hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf()); if (FAILED(hr)) { Error::SetHResult(error, "D3D12MA::CreateAllocator() failed: ", hr); return false; } hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); if (FAILED(hr)) { Error::SetHResult(error, "Failed to create fence: ", hr); return false; } m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (m_fence_event == NULL) { Error::SetWin32(error, "Failed to create fence event: ", GetLastError()); return false; } SetFeatures(feature_level, disabled_features); if (!CreateCommandLists(error) || !CreateDescriptorHeaps(error)) return false; if (!wi.IsSurfaceless()) { m_main_swap_chain = CreateSwapChain(wi, vsync_mode, allow_present_throttle, exclusive_fullscreen_mode, exclusive_fullscreen_control, error); if (!m_main_swap_chain) return false; } if (!CreateRootSignatures(error) || !CreateBuffers(error)) return false; CreateTimestampQuery(); return true; } void D3D12Device::DestroyDevice() { std::unique_lock lock(s_instance_mutex); // Toss command list if we're recording... if (InRenderPass()) EndRenderPass(); WaitForAllFences(); m_main_swap_chain.reset(); DestroyDeferredObjects(m_current_fence_value); DestroyTimestampQuery(); DestroyBuffers(); DestroyDescriptorHeaps(); DestroyRootSignatures(); DestroyCommandLists(); m_pipeline_library.Reset(); s_pipeline_cache_data.deallocate(); m_fence.Reset(); if (m_fence_event != NULL) { CloseHandle(m_fence_event); m_fence_event = NULL; } m_allocator.Reset(); m_command_queue.Reset(); m_device.Reset(); m_adapter.Reset(); m_dxgi_factory.Reset(); } void D3D12Device::GetPipelineCacheHeader(PIPELINE_CACHE_HEADER* hdr) { const LUID adapter_luid = m_device->GetAdapterLuid(); std::memcpy(&hdr->adapter_luid, &adapter_luid, sizeof(hdr->adapter_luid)); hdr->render_api_version = m_render_api_version; hdr->unused = 0; } bool D3D12Device::ReadPipelineCache(DynamicHeapArray data, Error* error) { PIPELINE_CACHE_HEADER expected_header; GetPipelineCacheHeader(&expected_header); if ((data.size() < sizeof(PIPELINE_CACHE_HEADER) || std::memcmp(data.data(), &expected_header, sizeof(PIPELINE_CACHE_HEADER)) != 0)) { Error::SetStringView(error, "Pipeline cache header does not match current device."); return false; } const HRESULT hr = m_device->CreatePipelineLibrary(&data[sizeof(PIPELINE_CACHE_HEADER)], data.size() - sizeof(PIPELINE_CACHE_HEADER), IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf())); if (FAILED(hr)) { Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr); return false; } // Have to keep the buffer around, DX doesn't take a copy. s_pipeline_cache_data = std::move(data); return true; } bool D3D12Device::CreatePipelineCache(const std::string& path, Error* error) { const HRESULT hr = m_device->CreatePipelineLibrary(nullptr, 0, IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf())); if (FAILED(hr)) { Error::SetHResult(error, "CreatePipelineLibrary() failed: ", hr); return false; } return true; } bool D3D12Device::GetPipelineCacheData(DynamicHeapArray* data, Error* error) { if (!m_pipeline_library) return false; const size_t size = m_pipeline_library->GetSerializedSize(); if (size == 0) { WARNING_LOG("Empty serialized pipeline state returned."); return true; } PIPELINE_CACHE_HEADER header; GetPipelineCacheHeader(&header); data->resize(sizeof(PIPELINE_CACHE_HEADER) + size); std::memcpy(data->data(), &header, sizeof(PIPELINE_CACHE_HEADER)); const HRESULT hr = m_pipeline_library->Serialize(data->data() + sizeof(PIPELINE_CACHE_HEADER), size); if (FAILED(hr)) { Error::SetHResult(error, "Serialize() failed: ", hr); data->deallocate(); return false; } return true; } bool D3D12Device::CreateCommandLists(Error* error) { for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) { CommandList& res = m_command_lists[i]; HRESULT hr; for (u32 j = 0; j < 2; j++) { hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(res.command_allocators[j].GetAddressOf())); if (FAILED(hr)) { Error::SetHResult(error, "CreateCommandAllocator() failed: ", hr); return false; } hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr, IID_PPV_ARGS(res.command_lists[j].GetAddressOf())); if (FAILED(hr)) { Error::SetHResult(error, "CreateCommandList() failed: ", hr); return false; } // Close the command lists, since the first thing we do is reset them. hr = res.command_lists[j]->Close(); if (FAILED(hr)) { Error::SetHResult(error, "Close() for new command list failed: ", hr); return false; } } if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_DESCRIPTORS_PER_FRAME, error)) { Error::AddPrefix(error, "Failed to create per frame descriptor allocator: "); return false; } if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME, error)) { Error::AddPrefix(error, "Failed to create per frame sampler allocator: "); return false; } } MoveToNextCommandList(); return true; } void D3D12Device::MoveToNextCommandList() { m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; m_current_fence_value++; // We may have to wait if this command list hasn't finished on the GPU. CommandList& res = m_command_lists[m_current_command_list]; WaitForFence(res.fence_counter); res.fence_counter = m_current_fence_value; res.init_list_used = false; // Begin command list. res.command_allocators[1]->Reset(); res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr); res.descriptor_allocator.Reset(); if (res.sampler_allocator.ShouldReset()) res.sampler_allocator.Reset(); if (res.has_timestamp_query) { // readback timestamp from the last time this cmdlist was used. // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist. const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)}; void* map; HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map); if (SUCCEEDED(hr)) { u64 timestamps[2]; std::memcpy(timestamps, static_cast(map) + offset, sizeof(timestamps)); m_accumulated_gpu_time += static_cast(static_cast(timestamps[1] - timestamps[0]) / m_timestamp_frequency); const D3D12_RANGE write_range = {}; m_timestamp_query_buffer->Unmap(0, &write_range); } else { WARNING_LOG("Map() for timestamp query failed: {:08X}", static_cast(hr)); } } res.has_timestamp_query = m_gpu_timing_enabled; if (m_gpu_timing_enabled) { res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); } ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(), res.sampler_allocator.GetDescriptorHeap()}; res.command_lists[1]->SetDescriptorHeaps(static_cast(std::size(heaps)), heaps); m_allocator->SetCurrentFrameIndex(static_cast(m_current_fence_value)); InvalidateCachedState(); } void D3D12Device::DestroyCommandLists() { for (CommandList& resources : m_command_lists) { resources.descriptor_allocator.Destroy(); resources.sampler_allocator.Destroy(); for (u32 i = 0; i < 2; i++) { resources.command_lists[i].Reset(); resources.command_allocators[i].Reset(); } } } bool D3D12Device::CreateDescriptorHeaps(Error* error) { if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_PERSISTENT_DESCRIPTORS, false, error) || !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false, error) || !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false, error) || !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, false, error)) { return false; } // Allocate null SRV descriptor for unbound textures. static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = { DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}}; if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) { Error::SetStringView(error, "Failed to allocate null SRV descriptor"); return false; } m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); // Same for UAVs. static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = { DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}}; if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor)) { Error::SetStringView(error, "Failed to allocate null UAV descriptor"); return false; } m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle); // Same for samplers. GPUSampler* default_sampler = GetSampler(GPUSampler::GetNearestConfig(), error); if (!default_sampler) [[unlikely]] return false; for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) m_current_samplers[i] = static_cast(default_sampler)->GetDescriptor(); return true; } void D3D12Device::DestroyDescriptorHeaps() { if (m_null_uav_descriptor) m_descriptor_heap_manager.Free(&m_null_uav_descriptor); if (m_null_srv_descriptor) m_descriptor_heap_manager.Free(&m_null_srv_descriptor); m_sampler_heap_manager.Destroy(); m_dsv_heap_manager.Destroy(); m_rtv_heap_manager.Destroy(); m_descriptor_heap_manager.Destroy(); } ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList() { CommandList& res = m_command_lists[m_current_command_list]; if (!res.init_list_used) { HRESULT hr = res.command_allocators[0]->Reset(); AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed"); hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr); AssertMsg(SUCCEEDED(hr), "Reset init command list failed"); res.init_list_used = true; } return res.command_lists[0].Get(); } void D3D12Device::SubmitCommandList(bool wait_for_completion) { DebugAssert(!InRenderPass()); if (m_device_was_lost) [[unlikely]] return; CommandList& res = m_command_lists[m_current_command_list]; HRESULT hr; if (res.has_timestamp_query) { // write the timestamp back at the end of the cmdlist res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(), m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); } // TODO: error handling if (res.init_list_used) { hr = res.command_lists[0]->Close(); if (FAILED(hr)) [[unlikely]] { ERROR_LOG("Closing init command list failed with HRESULT {:08X}", static_cast(hr)); m_device_was_lost = true; return; } } // Close and queue command list. hr = res.command_lists[1]->Close(); if (FAILED(hr)) [[unlikely]] { ERROR_LOG("Closing main command list failed with HRESULT {:08X}", static_cast(hr)); m_device_was_lost = true; return; } if (res.init_list_used) { const std::array execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()}; m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); } else { const std::array execute_lists{res.command_lists[1].Get()}; m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); } // Update fence when GPU has completed. hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter); if (FAILED(hr)) { ERROR_LOG("Signal command queue fence failed with HRESULT {:08X}", static_cast(hr)); m_device_was_lost = true; return; } MoveToNextCommandList(); if (wait_for_completion) WaitForFence(res.fence_counter); } void D3D12Device::SubmitCommandList(bool wait_for_completion, const std::string_view reason) { WARNING_LOG("Executing command buffer due to '{}'", reason); SubmitCommandList(wait_for_completion); } void D3D12Device::SubmitCommandListAndRestartRenderPass(const std::string_view reason) { if (InRenderPass()) EndRenderPass(); D3D12Pipeline* pl = m_current_pipeline; SubmitCommandList(false, reason); SetPipeline(pl); BeginRenderPass(); } void D3D12Device::WaitForFence(u64 fence) { if (m_device_was_lost) [[unlikely]] return; if (m_completed_fence_value >= fence) return; // Try non-blocking check. m_completed_fence_value = m_fence->GetCompletedValue(); if (m_completed_fence_value < fence) { // Fall back to event. HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); AssertMsg(SUCCEEDED(hr), "Set fence event on completion"); WaitForSingleObject(m_fence_event, INFINITE); m_completed_fence_value = m_fence->GetCompletedValue(); } // Release resources for as many command lists which have completed. DestroyDeferredObjects(m_completed_fence_value); } void D3D12Device::WaitForAllFences() { u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) { WaitForFence(m_command_lists[index].fence_counter); index = (index + 1) % NUM_COMMAND_LISTS; } } void D3D12Device::FlushCommands() { if (InRenderPass()) EndRenderPass(); SubmitCommandList(false); TrimTexturePool(); } void D3D12Device::WaitForGPUIdle() { if (InRenderPass()) EndRenderPass(); SubmitCommandList(true); } bool D3D12Device::CreateTimestampQuery() { constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS; constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT; const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT, 0u}; HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf())); if (FAILED(hr)) { ERROR_LOG("CreateQueryHeap() for timestamp failed with {:08X}", static_cast(hr)); m_features.gpu_timing = false; return false; } const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK, D3D12_HEAP_FLAG_NONE, nullptr, nullptr}; const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, m_timestamp_query_allocation.GetAddressOf(), IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf())); if (FAILED(hr)) { ERROR_LOG("CreateResource() for timestamp failed with {:08X}", static_cast(hr)); m_features.gpu_timing = false; return false; } u64 frequency; hr = m_command_queue->GetTimestampFrequency(&frequency); if (FAILED(hr)) { ERROR_LOG("GetTimestampFrequency() failed: {:08X}", static_cast(hr)); m_features.gpu_timing = false; return false; } m_timestamp_frequency = static_cast(frequency) / 1000.0; return true; } void D3D12Device::DestroyTimestampQuery() { m_timestamp_query_buffer.Reset(); m_timestamp_query_allocation.Reset(); m_timestamp_query_heap.Reset(); } float D3D12Device::GetAndResetAccumulatedGPUTime() { const float time = m_accumulated_gpu_time; m_accumulated_gpu_time = 0.0f; return time; } bool D3D12Device::SetGPUTimingEnabled(bool enabled) { m_gpu_timing_enabled = enabled && m_features.gpu_timing; return (enabled == m_gpu_timing_enabled); } void D3D12Device::DeferObjectDestruction(ComPtr resource) { DebugAssert(resource); m_cleanup_resources.emplace_back(GetCurrentFenceValue(), std::pair(nullptr, resource.Detach())); } void D3D12Device::DeferResourceDestruction(ComPtr allocation, ComPtr resource) { DebugAssert(allocation && resource); m_cleanup_resources.emplace_back( GetCurrentFenceValue(), std::pair(allocation.Detach(), resource.Detach())); } void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor) { DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX); m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(), std::pair(&heap, *descriptor)); descriptor->Clear(); } void D3D12Device::DestroyDeferredObjects(u64 fence_value) { while (!m_cleanup_descriptors.empty()) { auto& it = m_cleanup_descriptors.front(); if (it.first > fence_value) break; it.second.first->Free(it.second.second.index); m_cleanup_descriptors.pop_front(); } while (!m_cleanup_resources.empty()) { auto& it = m_cleanup_resources.front(); if (it.first > fence_value) break; it.second.second->Release(); if (it.second.first) it.second.first->Release(); m_cleanup_resources.pop_front(); } } D3D12SwapChain::D3D12SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, const GPUDevice::ExclusiveFullscreenMode* fullscreen_mode) : GPUSwapChain(wi, vsync_mode, allow_present_throttle) { if (fullscreen_mode) InitializeExclusiveFullscreenMode(fullscreen_mode); } D3D12SwapChain::~D3D12SwapChain() { DestroyRTVs(); DestroySwapChain(); } bool D3D12SwapChain::InitializeExclusiveFullscreenMode(const GPUDevice::ExclusiveFullscreenMode* mode) { const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format); const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); RECT client_rc{}; GetClientRect(window_hwnd, &client_rc); m_fullscreen_mode = D3DCommon::GetRequestedExclusiveFullscreenModeDesc(D3D12Device::GetInstance().GetDXGIFactory(), client_rc, mode, fm.resource_format, m_fullscreen_output.GetAddressOf()); return m_fullscreen_mode.has_value(); } u32 D3D12SwapChain::GetNewBufferCount(GPUVSyncMode vsync_mode) { // With vsync off, we only need two buffers. Same for blocking vsync. // With triple buffering, we need three. return (vsync_mode == GPUVSyncMode::Mailbox) ? 3 : 2; } bool D3D12SwapChain::CreateSwapChain(D3D12Device& dev, Error* error) { const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format); const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); RECT client_rc{}; GetClientRect(window_hwnd, &client_rc); // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen. if (IsExclusiveFullscreen() && m_vsync_mode == GPUVSyncMode::Mailbox) { WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen."); m_vsync_mode = GPUVSyncMode::FIFO; } DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; swap_chain_desc.Width = static_cast(client_rc.right - client_rc.left); swap_chain_desc.Height = static_cast(client_rc.bottom - client_rc.top); swap_chain_desc.Format = fm.resource_format; swap_chain_desc.SampleDesc.Count = 1; swap_chain_desc.BufferCount = GetNewBufferCount(m_vsync_mode); swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; HRESULT hr = S_OK; if (IsExclusiveFullscreen()) { DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc; DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {}; fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; fs_sd_desc.Width = m_fullscreen_mode->Width; fs_sd_desc.Height = m_fullscreen_mode->Height; fs_desc.RefreshRate = m_fullscreen_mode->RefreshRate; fs_desc.ScanlineOrdering = m_fullscreen_mode->ScanlineOrdering; fs_desc.Scaling = m_fullscreen_mode->Scaling; fs_desc.Windowed = FALSE; VERBOSE_LOG("Creating a {}x{} exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height); hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &fs_sd_desc, &fs_desc, m_fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf()); if (FAILED(hr)) { WARNING_LOG("Failed to create fullscreen swap chain, trying windowed."); m_fullscreen_output.Reset(); m_fullscreen_mode.reset(); } } if (!IsExclusiveFullscreen()) { VERBOSE_LOG("Creating a {}x{} windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height); m_using_allow_tearing = D3DCommon::SupportsAllowTearing(dev.GetDXGIFactory()); if (m_using_allow_tearing) swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; hr = dev.GetDXGIFactory()->CreateSwapChainForHwnd(dev.GetCommandQueue(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.ReleaseAndGetAddressOf()); if (FAILED(hr)) { Error::SetHResult(error, "CreateSwapChainForHwnd() failed: ", hr); return false; } } hr = dev.GetDXGIFactory()->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES); if (FAILED(hr)) WARNING_LOG("MakeWindowAssociation() to disable ALT+ENTER failed"); return true; } bool D3D12SwapChain::CreateRTV(D3D12Device& dev, Error* error) { DXGI_SWAP_CHAIN_DESC swap_chain_desc; HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc); if (FAILED(hr)) { Error::SetHResult(error, "GetDesc() for swap chain failed: ", hr); return false; } const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D, {}}; for (u32 i = 0; i < swap_chain_desc.BufferCount; i++) { ComPtr backbuffer; hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf())); if (FAILED(hr)) { Error::SetHResult(error, "GetBuffer for RTV failed: ", hr); DestroyRTVs(); return false; } D3D12::SetObjectName(backbuffer.Get(), TinyString::from_format("Swap Chain Buffer #{}", i)); D3D12DescriptorHandle rtv; if (!dev.GetRTVHeapManager().Allocate(&rtv)) { Error::SetStringView(error, "Failed to allocate RTV handle."); DestroyRTVs(); return false; } dev.GetDevice()->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv); m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv); } m_window_info.surface_width = static_cast(swap_chain_desc.BufferDesc.Width); m_window_info.surface_height = static_cast(swap_chain_desc.BufferDesc.Height); m_window_info.surface_format = s_swap_chain_format; VERBOSE_LOG("Swap chain buffer size: {}x{}", m_window_info.surface_width, m_window_info.surface_height); BOOL fullscreen = FALSE; DXGI_SWAP_CHAIN_DESC desc; if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && SUCCEEDED(m_swap_chain->GetDesc(&desc))) { m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / static_cast(desc.BufferDesc.RefreshRate.Denominator); } m_current_swap_chain_buffer = 0; return true; } void D3D12SwapChain::DestroyRTVs() { if (m_swap_chain_buffers.empty()) return; D3D12Device& dev = D3D12Device::GetInstance(); // Runtime gets cranky if we don't submit the current buffer... if (dev.InRenderPass()) dev.EndRenderPass(); dev.SubmitCommandList(true); for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it) { dev.GetRTVHeapManager().Free(it->second.index); it->first.Reset(); } m_swap_chain_buffers.clear(); m_current_swap_chain_buffer = 0; } void D3D12SwapChain::DestroySwapChain() { if (!m_swap_chain) return; // switch out of fullscreen before destroying BOOL is_fullscreen; if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen) m_swap_chain->SetFullscreenState(FALSE, nullptr); m_swap_chain.Reset(); } bool D3D12SwapChain::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle, Error* error) { m_allow_present_throttle = allow_present_throttle; // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen. if (mode == GPUVSyncMode::Mailbox && IsExclusiveFullscreen()) { WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen."); mode = GPUVSyncMode::FIFO; } if (m_vsync_mode == mode) return true; const u32 old_buffer_count = GetNewBufferCount(m_vsync_mode); const u32 new_buffer_count = GetNewBufferCount(mode); m_vsync_mode = mode; if (old_buffer_count == new_buffer_count) return true; // Buffer count change => needs recreation. DestroyRTVs(); DestroySwapChain(); D3D12Device& dev = D3D12Device::GetInstance(); return CreateSwapChain(dev, error) && CreateRTV(dev, error); } bool D3D12SwapChain::ResizeBuffers(u32 new_width, u32 new_height, float new_scale, Error* error) { m_window_info.surface_scale = new_scale; if (m_window_info.surface_width == new_width && m_window_info.surface_height == new_height) return true; DestroyRTVs(); HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); if (FAILED(hr)) ERROR_LOG("ResizeBuffers() failed: 0x{:08X}", static_cast(hr)); return CreateRTV(D3D12Device::GetInstance(), error); } std::unique_ptr D3D12Device::CreateSwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, const ExclusiveFullscreenMode* exclusive_fullscreen_mode, std::optional exclusive_fullscreen_control, Error* error) { std::unique_ptr ret; if (wi.type != WindowInfo::Type::Win32) { Error::SetStringView(error, "Cannot create a swap chain on non-win32 window."); return ret; } ret = std::make_unique(wi, vsync_mode, allow_present_throttle, exclusive_fullscreen_mode); if (ret->CreateSwapChain(*this, error) && ret->CreateRTV(*this, error)) { // Render a frame as soon as possible to clear out whatever was previously being displayed. RenderBlankFrame(ret.get()); } else { ret.reset(); } return ret; } void D3D12Device::RenderBlankFrame(D3D12SwapChain* swap_chain) { if (InRenderPass()) EndRenderPass(); const D3D12SwapChain::BufferPair& swap_chain_buf = swap_chain->GetCurrentBuffer(); ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_RENDER_TARGET); cmdlist->ClearRenderTargetView(swap_chain_buf.second, GSVector4::cxpr(0.0f, 0.0f, 0.0f, 1.0f).F32, 0, nullptr); D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); SubmitCommandList(false); swap_chain->GetSwapChain()->Present(0, swap_chain->IsUsingAllowTearing() ? DXGI_PRESENT_ALLOW_TEARING : 0); swap_chain->AdvanceBuffer(); } bool D3D12Device::SupportsTextureFormat(GPUTexture::Format format) const { constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format; if (dfmt == DXGI_FORMAT_UNKNOWN) return false; D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt, {}, {}}; return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) && (support.Support1 & required) == required; } std::string D3D12Device::GetDriverInfo() const { std::string ret = fmt::format("{} (Shader Model {})\n", D3DCommon::GetFeatureLevelString(m_render_api_version), D3DCommon::GetShaderModelForFeatureLevelNumber(m_render_api_version)); DXGI_ADAPTER_DESC desc; if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc))) { fmt::format_to(std::back_inserter(ret), "VID: 0x{:04X} PID: 0x{:04X}\n", desc.VendorId, desc.DeviceId); ret += StringUtil::WideStringToUTF8String(desc.Description); ret += "\n"; const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid)); if (!driver_version.empty()) { ret += "Driver Version: "; ret += driver_version; } } return ret; } GPUDevice::PresentResult D3D12Device::BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) { D3D12SwapChain* const SC = static_cast(swap_chain); if (InRenderPass()) EndRenderPass(); if (m_device_was_lost) [[unlikely]] return PresentResult::DeviceLost; // TODO: Check if the device was lost. // Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode. // This might get called repeatedly if it takes a while to switch back, that's the host's problem. BOOL is_fullscreen; if (SC->IsExclusiveFullscreen() && (FAILED(SC->GetSwapChain()->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen)) { FlushCommands(); TrimTexturePool(); return PresentResult::ExclusiveFullscreenLost; } m_current_swap_chain = SC; const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer(); ID3D12GraphicsCommandList4* const cmdlist = GetCommandList(); D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_RENDER_TARGET); // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) { if (m_current_textures[i]) m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {swap_chain_buf.second, {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {}}, {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; GSVector4::store(rt_desc.BeginningAccess.Clear.ClearValue.Color, GSVector4::rgba32(clear_color)); cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_num_current_render_targets = 0; m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0); m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; m_current_depth_target = nullptr; m_in_render_pass = true; s_stats.num_render_passes++; // Clear pipeline, it's likely incompatible. m_current_pipeline = nullptr; return PresentResult::OK; } void D3D12Device::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) { D3D12SwapChain* const SC = static_cast(swap_chain); DebugAssert(present_time == 0); DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); EndRenderPass(); DebugAssert(SC == m_current_swap_chain); m_current_swap_chain = nullptr; const D3D12SwapChain::BufferPair& swap_chain_buf = SC->GetCurrentBuffer(); SC->AdvanceBuffer(); ID3D12GraphicsCommandList* cmdlist = GetCommandList(); D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); SubmitCommandList(false); TrimTexturePool(); if (!explicit_present) SubmitPresent(swap_chain); } void D3D12Device::SubmitPresent(GPUSwapChain* swap_chain) { D3D12SwapChain* const SC = static_cast(swap_chain); if (m_device_was_lost) [[unlikely]] return; const UINT sync_interval = static_cast(SC->GetVSyncMode() == GPUVSyncMode::FIFO); const UINT flags = (SC->GetVSyncMode() == GPUVSyncMode::Disabled && SC->IsUsingAllowTearing()) ? DXGI_PRESENT_ALLOW_TEARING : 0; SC->GetSwapChain()->Present(sync_interval, flags); } #ifdef ENABLE_GPU_OBJECT_NAMES static UINT64 Palette(float phase, const std::array& a, const std::array& b, const std::array& c, const std::array& d) { std::array result; result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); return PIX_COLOR(static_cast(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)), static_cast(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)), static_cast(std::clamp(result[2] * 255.0f, 0.0f, 255.0f))); } void D3D12Device::PushDebugGroup(const char* name) { if (!m_debug_device) return; const UINT64 color = Palette(static_cast(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); PIXBeginEvent(GetCommandList(), color, "%s", name); } void D3D12Device::PopDebugGroup() { if (!m_debug_device) return; s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); PIXEndEvent(GetCommandList()); } void D3D12Device::InsertDebugMessage(const char* msg) { if (!m_debug_device) return; PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", msg); } #endif void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disabled_features) { m_render_api_version = D3DCommon::GetRenderAPIVersionForFeatureLevel(feature_level); m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; m_max_multisamples = 1; for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) { D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast(multisamples), D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0u}; if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) && fd.NumQualityLevels > 0) { m_max_multisamples = multisamples; } } m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND); m_features.framebuffer_fetch = false; m_features.per_sample_shading = true; m_features.noperspective_interpolation = true; m_features.texture_copy_to_self = /*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers m_features.texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.texture_buffers_emulated_with_ssbo = false; m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = false; m_features.explicit_present = true; m_features.timed_present = false; m_features.gpu_timing = true; m_features.shader_cache = true; m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; m_features.raster_order_views = false; if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS)) { D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; m_features.raster_order_views = SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && options.ROVsSupported; } m_features.dxt_textures = (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) && SupportsTextureFormat(GPUTexture::Format::BC3))); m_features.bptc_textures = (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7)); } void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) { D3D12Texture* const S = static_cast(src); D3D12Texture* const D = static_cast(dst); if (S->GetState() == GPUTexture::State::Cleared) { // source is cleared. if destination is a render target, we can carry the clear forward if (D->IsRenderTargetOrDepthStencil()) { if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight()) { // pass it forward if we're clearing the whole thing if (S->IsDepthStencil()) D->SetClearDepth(S->GetClearDepth()); else D->SetClearColor(S->GetClearColor()); return; } if (D->GetState() == GPUTexture::State::Cleared) { // destination is cleared, if it's the same colour and rect, we can just avoid this entirely if (D->IsDepthStencil()) { if (D->GetClearDepth() == S->GetClearDepth()) return; } else { if (D->GetClearColor() == S->GetClearColor()) return; } } } // commit the clear to the source first, then do normal copy S->CommitClear(); } // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first // (the area outside of where we're copying to) if (D->GetState() == GPUTexture::State::Cleared && (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight())) { D->CommitClear(); } s_stats.num_copies++; // *now* we can do a normal image copy. if (InRenderPass()) EndRenderPass(); S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); S->SetUseFenceValue(GetCurrentFenceValue()); D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); D->SetUseFenceValue(GetCurrentFenceValue()); D3D12_TEXTURE_COPY_LOCATION srcloc; srcloc.pResource = S->GetResource(); srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level); D3D12_TEXTURE_COPY_LOCATION dstloc; dstloc.pResource = D->GetResource(); dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level); const D3D12_BOX srcbox{static_cast(src_x), static_cast(src_y), 0u, static_cast(src_x + width), static_cast(src_y + height), 1u}; GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox); D->SetState(GPUTexture::State::Dirty); } void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) { DebugAssert((src_x + width) <= src->GetWidth()); DebugAssert((src_y + height) <= src->GetHeight()); DebugAssert(src->IsMultisampled()); DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); if (InRenderPass()) EndRenderPass(); s_stats.num_copies++; D3D12Texture* D = static_cast(dst); D3D12Texture* S = static_cast(src); ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); const u32 DSR = D->CalculateSubresource(dst_layer, dst_level); S->CommitClear(cmdlist); D->CommitClear(cmdlist); S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE); D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST); if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 && width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level)) { cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat()); } else { D3D12_RECT src_rc{static_cast(src_x), static_cast(src_y), static_cast(src_x + width), static_cast(src_y + height)}; cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y, S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE); } S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState()); D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState()); } void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); if (InRenderPass() && IsRenderTargetBound(t)) EndRenderPass(); } void D3D12Device::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); if (InRenderPass() && m_current_depth_target == t) EndRenderPass(); } void D3D12Device::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : IsRenderTargetBound(t))) EndRenderPass(); } bool D3D12Device::CreateBuffers(Error* error) { if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE, error)) { ERROR_LOG("Failed to allocate vertex buffer"); return false; } if (!m_index_buffer.Create(INDEX_BUFFER_SIZE, error)) { ERROR_LOG("Failed to allocate index buffer"); return false; } if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE, error)) { ERROR_LOG("Failed to allocate uniform buffer"); return false; } if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE, error)) { ERROR_LOG("Failed to allocate texture upload buffer"); return false; } return true; } void D3D12Device::DestroyBuffers() { m_texture_upload_buffer.Destroy(false); m_uniform_buffer.Destroy(false); m_index_buffer.Destroy(false); m_vertex_buffer.Destroy(false); } void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) { const u32 req_size = vertex_size * vertex_count; if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) { SubmitCommandListAndRestartRenderPass("out of vertex space"); if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) Panic("Failed to allocate vertex space"); } *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; } void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { const u32 upload_size = vertex_size * vertex_count; s_stats.buffer_streamed += upload_size; m_vertex_buffer.CommitMemory(upload_size); } void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) { const u32 req_size = sizeof(DrawIndex) * index_count; if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) { SubmitCommandListAndRestartRenderPass("out of index space"); if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) Panic("Failed to allocate index space"); } *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); } void D3D12Device::UnmapIndexBuffer(u32 used_index_count) { const u32 upload_size = sizeof(DrawIndex) * used_index_count; s_stats.buffer_streamed += upload_size; m_index_buffer.CommitMemory(upload_size); } void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) { static constexpr std::array(GPUPipeline::Layout::MaxCount)> push_parameters = { 0, // SingleTextureAndUBO 2, // SingleTextureAndPushConstants 1, // SingleTextureBufferAndPushConstants 0, // MultiTextureAndUBO 2, // MultiTextureAndPushConstants 2, // ComputeSingleTextureAndPushConstants }; DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); if (m_dirty_flags & DIRTY_FLAG_PIPELINE_LAYOUT) { m_dirty_flags &= ~DIRTY_FLAG_PIPELINE_LAYOUT; UpdateRootSignature(); } s_stats.buffer_streamed += data_size; const u32 push_param = push_parameters[static_cast(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature()); ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); if (!IsUsingComputeRootSignature()) cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); else cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0); } void* D3D12Device::MapUniformBuffer(u32 size) { const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) { SubmitCommandListAndRestartRenderPass("out of uniform space"); if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) Panic("Failed to allocate uniform space."); } return m_uniform_buffer.GetCurrentHostPointer(); } void D3D12Device::UnmapUniformBuffer(u32 size) { s_stats.buffer_streamed += size; m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); m_uniform_buffer.CommitMemory(size); m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER; } bool D3D12Device::CreateRootSignatures(Error* error) { D3D12::RootSignatureBuilder rsb; for (u32 rov = 0; rov < 2; rov++) { if (rov && !m_features.raster_order_views) break; { auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; rsb.SetInputAssemblerFlag(); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); if (rov) { rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_PIXEL); } if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout"); } { auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; rsb.SetInputAssemblerFlag(); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); if (rov) { rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_PIXEL); } rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout"); } { auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; rsb.SetInputAssemblerFlag(); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); if (rov) { rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_PIXEL); } rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout"); } { auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; rsb.SetInputAssemblerFlag(); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); if (rov) { rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_PIXEL); } if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout"); } { auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; rsb.SetInputAssemblerFlag(); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); if (rov) { rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_PIXEL); } rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout"); } } { auto& rs = m_root_signatures[0][static_cast(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)]; rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL); rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); if (!(rs = rsb.Create(error, true))) return false; D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout"); } return true; } void D3D12Device::DestroyRootSignatures() { m_root_signatures.enumerate([](auto& it) { it.Reset(); }); } void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, GPUPipeline::RenderPassFlag flags) { DebugAssert( !(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer))); const bool image_bind_changed = ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; if (InRenderPass()) EndRenderPass(); m_current_depth_target = static_cast(ds); for (u32 i = 0; i < num_rts; i++) { D3D12Texture* const RT = static_cast(rts[i]); changed |= m_current_render_targets[i] != RT; m_current_render_targets[i] = RT; needs_rt_clear |= RT->IsClearedOrInvalidated(); } for (u32 i = num_rts; i < m_num_current_render_targets; i++) m_current_render_targets[i] = nullptr; m_num_current_render_targets = Truncate8(num_rts); m_current_render_pass_flags = flags; // Don't end render pass unless it's necessary. if (changed) { if (InRenderPass()) EndRenderPass(); // Need a root signature change if switching to UAVs. m_dirty_flags |= image_bind_changed ? LAYOUT_DEPENDENT_DIRTY_STATE : 0; m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) : (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS); } else if (needs_rt_clear || needs_ds_clear) { if (InRenderPass()) EndRenderPass(); } } void D3D12Device::BeginRenderPass() { DebugAssert(!InRenderPass()); std::array rt_desc; D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc; D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr; D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr; u32 num_rt_descs = 0; ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]] { if (!IsUsingROVRootSignature()) [[likely]] { for (u32 i = 0; i < m_num_current_render_targets; i++) { D3D12Texture* const rt = m_current_render_targets[i]; rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); rt->SetUseFenceValue(GetCurrentFenceValue()); D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i]; desc.cpuDescriptor = rt->GetWriteDescriptor(); desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; switch (rt->GetState()) { case GPUTexture::State::Cleared: { desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), sizeof(desc.BeginningAccess.Clear.ClearValue.Color)); rt->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Invalidated: { desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; rt->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Dirty: { desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; } break; default: UnreachableCode(); break; } } rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr; num_rt_descs = m_num_current_render_targets; } else { // Still need to clear the RTs. for (u32 i = 0; i < m_num_current_render_targets; i++) { D3D12Texture* const rt = m_current_render_targets[i]; rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rt->SetUseFenceValue(GetCurrentFenceValue()); rt->CommitClear(cmdlist); rt->SetState(GPUTexture::State::Dirty); } } if (m_current_depth_target) { D3D12Texture* const ds = m_current_depth_target; ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); ds->SetUseFenceValue(GetCurrentFenceValue()); ds_desc_p = &ds_desc; ds_desc.cpuDescriptor = ds->GetWriteDescriptor(); ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; ds_desc.StencilBeginningAccess = {}; ds_desc.StencilEndingAccess = {}; switch (ds->GetState()) { case GPUTexture::State::Cleared: { ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth(); ds->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Invalidated: { ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; ds->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Dirty: { ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; } break; default: UnreachableCode(); break; } ds_desc_p = &ds_desc; } } else { // Re-rendering to swap chain. const auto& swap_chain_buf = m_current_swap_chain->GetCurrentBuffer(); rt_desc[0] = {swap_chain_buf.second, {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}}, {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; rt_desc_p = &rt_desc[0]; num_rt_descs = 1; } // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) { if (m_current_textures[i]) m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature()); cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); // TODO: Stats m_in_render_pass = true; s_stats.num_render_passes++; // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) SetInitialPipelineState(); } bool D3D12Device::InRenderPass() { return m_in_render_pass; } void D3D12Device::EndRenderPass() { DebugAssert(m_in_render_pass); // TODO: stats m_in_render_pass = false; GetCommandList()->EndRenderPass(); } void D3D12Device::SetPipeline(GPUPipeline* pipeline) { // First draw? Bind everything. if (m_dirty_flags & DIRTY_FLAG_INITIAL) { m_current_pipeline = static_cast(pipeline); if (!m_current_pipeline) return; SetInitialPipelineState(); return; } else if (m_current_pipeline == pipeline) { return; } m_current_pipeline = static_cast(pipeline); ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology) { m_current_topology = topology; cmdlist->IASetPrimitiveTopology(topology); } if (u32 vertex_stride = m_current_pipeline->GetVertexStride(); vertex_stride > 0 && m_current_vertex_stride != vertex_stride) { m_current_vertex_stride = vertex_stride; SetVertexBuffer(cmdlist); } // TODO: we don't need to change the blend constant if blending isn't on. if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants) { m_current_blend_constant = blend_constants; cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); } if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout) { m_current_pipeline_layout = layout; m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS); } } void D3D12Device::UnbindPipeline(D3D12Pipeline* pl) { if (m_current_pipeline != pl) return; m_current_pipeline = nullptr; } bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const { for (u32 i = 0; i < m_num_current_render_targets; i++) { if (m_current_render_targets[i] == tex) return true; } return false; } void D3D12Device::InvalidateCachedState() { m_dirty_flags = ALL_DIRTY_STATE & ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS); m_in_render_pass = false; m_current_pipeline = nullptr; m_current_vertex_stride = 0; m_current_blend_constant = 0; m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; } void D3D12Device::SetInitialPipelineState() { DebugAssert(m_current_pipeline); m_dirty_flags &= ~DIRTY_FLAG_INITIAL; ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); m_current_vertex_stride = m_current_pipeline->GetVertexStride(); SetVertexBuffer(cmdlist); const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(), DXGI_FORMAT_R16_UINT}; cmdlist->IASetIndexBuffer(&ib_view); cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); m_current_pipeline_layout = m_current_pipeline->GetLayout(); m_current_topology = m_current_pipeline->GetTopology(); cmdlist->IASetPrimitiveTopology(m_current_topology); m_current_blend_constant = m_current_pipeline->GetBlendConstants(); cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); SetViewport(cmdlist); SetScissor(cmdlist); } void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist) { const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(), m_current_vertex_stride}; cmdlist->IASetVertexBuffers(0, 1, &vb_view); } void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist) { const D3D12_VIEWPORT vp = {static_cast(m_current_viewport.left), static_cast(m_current_viewport.top), static_cast(m_current_viewport.width()), static_cast(m_current_viewport.height()), 0.0f, 1.0f}; cmdlist->RSSetViewports(1, &vp); } void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist) { static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT)); cmdlist->RSSetScissorRects(1, reinterpret_cast(&m_current_scissor)); } void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) { D3D12Texture* T = static_cast(texture); if (m_current_textures[slot] != T) { m_current_textures[slot] = T; m_dirty_flags |= DIRTY_FLAG_TEXTURES; if (T) { T->CommitClear(); T->SetUseFenceValue(GetCurrentFenceValue()); if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) { if (InRenderPass()) EndRenderPass(); T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } } } const D3D12DescriptorHandle& handle = static_cast(sampler ? sampler : m_nearest_sampler)->GetDescriptor(); if (m_current_samplers[slot] != handle) { m_current_samplers[slot] = handle; m_dirty_flags |= DIRTY_FLAG_SAMPLERS; } } void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) { DebugAssert(slot == 0); if (m_current_texture_buffer == buffer) return; m_current_texture_buffer = static_cast(buffer); if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) m_dirty_flags |= DIRTY_FLAG_TEXTURES; } void D3D12Device::UnbindTexture(D3D12Texture* tex) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { if (m_current_textures[i] == tex) { m_current_textures[i] = nullptr; m_dirty_flags |= DIRTY_FLAG_TEXTURES; } } if (tex->IsRenderTarget() || tex->HasFlag(GPUTexture::Flags::AllowBindAsImage)) { for (u32 i = 0; i < m_num_current_render_targets; i++) { if (m_current_render_targets[i] == tex) { if (InRenderPass()) EndRenderPass(); m_current_render_targets[i] = nullptr; } } } else if (tex->IsDepthStencil()) { if (m_current_depth_target == tex) { if (InRenderPass()) EndRenderPass(); m_current_depth_target = nullptr; } } } void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) { if (m_current_texture_buffer != buf) return; m_current_texture_buffer = nullptr; if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) m_dirty_flags |= DIRTY_FLAG_TEXTURES; } void D3D12Device::RenderTextureMipmap(D3D12Texture* texture, u32 dst_level, u32 dst_width, u32 dst_height, u32 src_level, u32 src_width, u32 src_height) { ID3D12RootSignature* rootsig = m_root_signatures[0][static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)].Get(); ComPtr& pipeline = m_mipmap_render_pipelines[static_cast(texture->GetFormat())]; if (!pipeline) { D3D12::GraphicsPipelineBuilder gpb; gpb.SetRootSignature(rootsig); gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); gpb.SetRenderTarget(0, texture->GetDXGIFormat()); gpb.SetVertexShader(s_mipmap_blit_vs, std::size(s_mipmap_blit_vs)); gpb.SetPixelShader(s_mipmap_blit_ps, std::size(s_mipmap_blit_ps)); gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); gpb.SetBlendState(0, false, D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); const std::wstring name = StringUtil::UTF8StringToWideString( TinyString::from_format("MipmapRender-{}", GPUTexture::GetFormatName(texture->GetFormat()))); Error error; if (m_pipeline_library) { HRESULT hr = m_pipeline_library->LoadGraphicsPipeline(name.c_str(), gpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf())); if (FAILED(hr)) { // E_INVALIDARG = not found. if (hr != E_INVALIDARG) ERROR_LOG("LoadGraphicsPipeline() failed with HRESULT {:08X}", static_cast(hr)); // Need to create it normally. pipeline = gpb.Create(m_device.Get(), &error, false); // Store if it wasn't an OOM or something else. if (pipeline && hr == E_INVALIDARG) { hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get()); if (FAILED(hr)) ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast(hr)); } } } else { pipeline = gpb.Create(m_device.Get(), &error, false); } if (!pipeline) { ERROR_LOG("Failed to compile mipmap render pipeline for {}: {}", GPUTexture::GetFormatName(texture->GetFormat()), error.GetDescription()); return; } } EndRenderPass(); // we need a temporary SRV and RTV for each mip level // Safe to use the init buffer after exec, because everything will be done with the texture. D3D12DescriptorHandle rtv_handle; while (!GetRTVHeapManager().Allocate(&rtv_handle)) SubmitCommandList(false, "Allocate RTV for RenderTextureMipmap()"); D3D12DescriptorHandle srv_handle; while (!GetDescriptorHeapManager().Allocate(&srv_handle)) SubmitCommandList(false, "Allocate SRV for RenderTextureMipmap()"); // Setup views. This will be a partial view for the SRV. const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {.Format = texture->GetDXGIFormat(), .ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D, .Texture2D = {.MipSlice = dst_level, .PlaneSlice = 0}}; m_device->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle); const D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { .Format = texture->GetDXGIFormat(), .ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D, .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, .Texture2D = {.MostDetailedMip = src_level, .MipLevels = 1, .PlaneSlice = 0, .ResourceMinLODClamp = 0.0f}}; m_device->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle); // *now* we don't have to worry about running out of anything. ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) { texture->TransitionSubresourceToState(cmdlist, src_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) { texture->TransitionSubresourceToState(cmdlist, dst_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_RENDER_TARGET); } const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = { .cpuDescriptor = rtv_handle, .BeginningAccess = {.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, .Clear = {}}, .EndingAccess = {.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, .Resolve = {}}}; cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height), 0.0f, 1.0f}; cmdlist->RSSetViewports(1, &vp); const D3D12_RECT scissor = {0, 0, static_cast(dst_width), static_cast(dst_height)}; cmdlist->RSSetScissorRects(1, &scissor); cmdlist->SetPipelineState(pipeline.Get()); cmdlist->SetGraphicsRootDescriptorTable(0, srv_handle); cmdlist->SetGraphicsRootDescriptorTable(1, static_cast(m_linear_sampler)->GetDescriptor()); cmdlist->DrawInstanced(3, 1, 0, 0); cmdlist->EndRenderPass(); if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) { texture->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, texture->GetResourceState()); } if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) { texture->TransitionSubresourceToState(cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET, texture->GetResourceState()); } // Must destroy after current cmdlist. DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle); DeferDescriptorDestruction(m_rtv_heap_manager, &rtv_handle); // Restore for next normal draw. SetViewport(GetCommandList()); SetScissor(GetCommandList()); m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE; } void D3D12Device::SetViewport(const GSVector4i rc) { if (m_current_viewport.eq(rc)) return; m_current_viewport = rc; if (m_dirty_flags & DIRTY_FLAG_INITIAL) return; SetViewport(GetCommandList()); } void D3D12Device::SetScissor(const GSVector4i rc) { if (m_current_scissor.eq(rc)) return; m_current_scissor = rc; if (m_dirty_flags & DIRTY_FLAG_INITIAL) return; SetScissor(GetCommandList()); } void D3D12Device::PreDrawCheck() { // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); const u32 dirty = std::exchange(m_dirty_flags, 0); if (dirty != 0) { if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) { UpdateRootSignature(); if (!UpdateRootParameters(dirty)) { SubmitCommandListAndRestartRenderPass("out of descriptors"); PreDrawCheck(); return; } } else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) { if (!UpdateRootParameters(dirty)) { SubmitCommandListAndRestartRenderPass("out of descriptors"); PreDrawCheck(); return; } } } if (!InRenderPass()) BeginRenderPass(); } void D3D12Device::PreDispatchCheck() { if (InRenderPass()) EndRenderPass(); // Transition images. ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) { if (m_current_textures[i]) m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) { // Still need to clear the RTs. for (u32 i = 0; i < m_num_current_render_targets; i++) { D3D12Texture* const rt = m_current_render_targets[i]; rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rt->SetUseFenceValue(GetCurrentFenceValue()); rt->CommitClear(cmdlist); rt->SetState(GPUTexture::State::Dirty); } } // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) SetInitialPipelineState(); // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); const u32 dirty = std::exchange(m_dirty_flags, 0); if (dirty != 0) { if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) { UpdateRootSignature(); if (!UpdateRootParameters(dirty)) { SubmitCommandList(false, "out of descriptors"); PreDispatchCheck(); return; } } else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) { if (!UpdateRootParameters(dirty)) { SubmitCommandList(false, "out of descriptors"); PreDispatchCheck(); return; } } } } bool D3D12Device::IsUsingROVRootSignature() const { return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0); } bool D3D12Device::IsUsingComputeRootSignature() const { return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants); } void D3D12Device::UpdateRootSignature() { ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); if (!IsUsingComputeRootSignature()) { cmdlist->SetGraphicsRootSignature( m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); } else { cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast(m_current_pipeline_layout)].Get()); } } template bool D3D12Device::UpdateParametersForLayout(u32 dirty) { ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) { if (dirty & DIRTY_FLAG_CONSTANT_BUFFER) cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); } constexpr u32 num_textures = GetActiveTexturesForLayout(layout); if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0) { D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; D3D12DescriptorHandle gpu_handle; if (!allocator.Allocate(num_textures, &gpu_handle)) return false; if constexpr (num_textures == 1) { m_device->CopyDescriptorsSimple( 1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } else { D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS]; UINT src_sizes[MAX_TEXTURE_SAMPLERS]; for (u32 i = 0; i < num_textures; i++) { src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor; src_sizes[i] = 1; } m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(0, gpu_handle); } if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0) { auto& allocator = m_command_lists[m_current_command_list].sampler_allocator; D3D12DescriptorHandle gpu_handle; if constexpr (num_textures == 1) { if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0])) return false; } else { if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data())) return false; } if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(1, gpu_handle); } if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) { D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; D3D12DescriptorHandle gpu_handle; if (!allocator.Allocate(1, &gpu_handle)) return false; m_device->CopyDescriptorsSimple( 1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); } if (dirty & DIRTY_FLAG_RT_UAVS) { DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages); D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; D3D12DescriptorHandle gpu_handle; if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle)) return false; D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS]; UINT src_sizes[MAX_IMAGE_RENDER_TARGETS]; const UINT dst_size = MAX_IMAGE_RENDER_TARGETS; for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) { src_handles[i] = m_current_render_targets[i] ? m_current_render_targets[i]->GetSRVDescriptor() : m_null_srv_descriptor; src_sizes[i] = 1; } m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); constexpr u32 rov_param = (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ? 1 : ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : 2); if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); else cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle); } return true; } bool D3D12Device::UpdateRootParameters(u32 dirty) { switch (m_current_pipeline_layout) { case GPUPipeline::Layout::SingleTextureAndUBO: return UpdateParametersForLayout(dirty); case GPUPipeline::Layout::SingleTextureAndPushConstants: return UpdateParametersForLayout(dirty); case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: return UpdateParametersForLayout(dirty); case GPUPipeline::Layout::MultiTextureAndUBO: return UpdateParametersForLayout(dirty); case GPUPipeline::Layout::MultiTextureAndPushConstants: return UpdateParametersForLayout(dirty); case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants: return UpdateParametersForLayout(dirty); default: UnreachableCode(); } } void D3D12Device::Draw(u32 vertex_count, u32 base_vertex) { PreDrawCheck(); s_stats.num_draws++; GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0); } void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { PreDrawCheck(); s_stats.num_draws++; GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); } void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) { Panic("Barriers are not supported"); } void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y, u32 group_size_z) { PreDispatchCheck(); s_stats.num_draws++; const u32 groups_x = threads_x / group_size_x; const u32 groups_y = threads_y / group_size_y; const u32 groups_z = threads_z / group_size_z; GetCommandList()->Dispatch(groups_x, groups_y, groups_z); }