From 4506ff1c469b25645998feeb9f581295d97c308b Mon Sep 17 00:00:00 2001 From: TheLastRar Date: Wed, 7 Jan 2026 00:54:08 +0000 Subject: [PATCH] GS/DX12: Use Enhanced Barriers API --- pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp | 13 +- pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 427 +++++++++---- pcsx2/GS/Renderers/DX12/GSDevice12.h | 29 +- pcsx2/GS/Renderers/DX12/GSTexture12.cpp | 593 ++++++++++++++---- pcsx2/GS/Renderers/DX12/GSTexture12.h | 42 +- 5 files changed, 842 insertions(+), 262 deletions(-) diff --git a/pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp b/pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp index 7ce28716ca..4713751066 100644 --- a/pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp +++ b/pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp @@ -22,8 +22,8 @@ D3D12StreamBuffer::~D3D12StreamBuffer() bool D3D12StreamBuffer::Create(u32 size) { - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, - {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; + const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, + {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}}; D3D12MA::ALLOCATION_DESC allocationDesc = {}; allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED; @@ -31,8 +31,13 @@ bool D3D12StreamBuffer::Create(u32 size) wil::com_ptr_nothrow buffer; wil::com_ptr_nothrow allocation; - HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); + HRESULT hr; + if (GSDevice12::GetInstance()->UseEnhancedBarriers()) + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocationDesc, &resource_desc.desc1, + D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); + else + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc.desc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); pxAssertMsg(SUCCEEDED(hr), "Allocate buffer"); if (FAILED(hr)) return false; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 48efb0cbc9..c24a89be43 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -19,6 +19,8 @@ #include "common/ScopedGuard.h" #include "common/SmallString.h" #include "common/StringUtil.h" +#include "common/FileSystem.h" +#include "common/Path.h" #include "D3D12MemAlloc.h" #include "imgui.h" @@ -158,6 +160,83 @@ u32 GSDevice12::GetAdapterVendorID() const return desc.VendorId; } +uint SDKVersion(const std::string& path) +{ + // The Agility SDK version is embeded as the minor file version. + // This is only true for the redist files, not the OS files. + // Alternativly, D3D12Core also exports its D3D12SDKVersion. + std::wstring wpath = FileSystem::GetWin32Path(path); + + const DWORD size = GetFileVersionInfoSizeW(wpath.c_str(), nullptr); + if (size == 0) + return 0; + + BOOL ret; + + std::vector info_data(size); + ret = GetFileVersionInfoW(wpath.c_str(), 0, size, info_data.data()); + if (!ret) + return 0; + + uint len; + VS_FIXEDFILEINFO* file_ver; + ret = VerQueryValueW(info_data.data(), L"\\", reinterpret_cast(&file_ver), &len); + if (!ret) + return 0; + + return file_ver->dwFileVersionMS & 0xFFFF; +} + +void GSDevice12::LoadAgilitySDK() +{ + static bool agility_loaded = false; + if (agility_loaded) + return; + + HRESULT hr; + + // See https://microsoft.github.io/DirectX-Specs/d3d/IndependentDevices.html + ComPtr sdk_configuration; + hr = D3D12GetInterface(CLSID_D3D12SDKConfiguration, IID_PPV_ARGS(sdk_configuration.put())); + if (FAILED(hr)) + { + Console.Error("D3D12: Agility SDK configuration is not available"); + return; + } + + std::string sdk_path = Path::Combine(Path::GetDirectory(FileSystem::GetProgramPath()), "\\D3D12\\"); + std::string core_path = Path::Combine(sdk_path, "D3D12Core.dll"); + if (!FileSystem::FileExists(core_path.c_str())) + return; + + const uint agility_version = SDKVersion(core_path); + if (agility_version == 0) + return; + + ComPtr device_factory; + // CreateDeviceFactory seems to use a utf8 string for the path. + // If the system has a newer SDK, then the system SDK seems to be returned instead. + hr = sdk_configuration->CreateDeviceFactory(agility_version, + StringUtil::WideStringToUTF8String(FileSystem::GetWin32Path(sdk_path)).c_str(), IID_PPV_ARGS(device_factory.put())); + if (FAILED(hr)) + { + Console.ErrorFmt("D3D12: Unable to load provided Agility SDK {:08X}", hr); + return; + } + + // Windows 10 (and older drivers on 11) will apply to the global state in ID3D12DeviceFactory::CreateDevice(). + // To get consistant behaviour across all systems, always apply the global state. + // This also allows us to use the normal D3D12*() methods with the loaded agility SDK. + hr = device_factory->ApplyToGlobalState(); + if (FAILED(hr)) + { + Console.ErrorFmt("D3D12: Unable to apply provided Agility SDK {:08X}", hr); + return; + } + + agility_loaded = true; +} + bool GSDevice12::CreateDevice(u32& vendor_id) { bool enable_debug_layer = GSConfig.UseDebugDevice; @@ -171,6 +250,9 @@ bool GSDevice12::CreateDevice(u32& vendor_id) HRESULT hr; + // Load the Agility SDK + LoadAgilitySDK(); + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. if (enable_debug_layer) { @@ -191,6 +273,7 @@ bool GSDevice12::CreateDevice(u32& vendor_id) // Intel Haswell doesn't actually support DX12 even tho the device is created which results in a crash, // to get around this check if device can be created using feature level 12 (skylake+). const bool isIntel = (vendor_id == 0x163C || vendor_id == 0x8086 || vendor_id == 0x8087); + // Create the actual device. hr = D3D12CreateDevice(m_adapter.get(), isIntel ? D3D_FEATURE_LEVEL_12_0 : D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); if (FAILED(hr)) @@ -206,6 +289,14 @@ bool GSDevice12::CreateDevice(u32& vendor_id) Console.Error("D3D12: Failed to get lookup adapter by device LUID"); } + ComPtr config = m_device.try_query(); + int sdkVersion = 0; + if (config) + { + sdkVersion = config->GetDesc().SDKVersion; + Console.WriteLnFmt("D3D12: Agility version: {}", sdkVersion); + } + if (enable_debug_layer) { ComPtr info_queue = m_device.try_query(); @@ -218,14 +309,18 @@ bool GSDevice12::CreateDevice(u32& vendor_id) } D3D12_INFO_QUEUE_FILTER filter = {}; - std::array id_list{ + std::array id_list{ D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, + // The current OS version of D3D12 (616) has a validation bug + // This is fixed with Agility 1.618.4. + // For now, disable this warning untill the OS updates. + D3D12_MESSAGE_ID_INCOMPATIBLE_BARRIER_LAYOUT, }; - filter.DenyList.NumIDs = static_cast(id_list.size()); + filter.DenyList.NumIDs = static_cast(sdkVersion < 618 ? id_list.size() : id_list.size() - 1); filter.DenyList.pIDList = id_list.data(); info_queue->PushStorageFilter(&filter); } @@ -318,8 +413,17 @@ bool GSDevice12::CreateCommandLists() if (FAILED(hr)) return false; - hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(), - nullptr, IID_PPV_ARGS(res.command_lists[i].put())); + if (m_enhanced_barriers) + { + hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(), + nullptr, IID_PPV_ARGS(res.command_lists[i].list7.put())); + res.command_lists[i].list4 = res.command_lists[i].list7; + } + else + { + hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(), + nullptr, IID_PPV_ARGS(res.command_lists[i].list4.put())); + } if (FAILED(hr)) { Console.Error("D3D12: Failed to create command list: %08X", hr); @@ -327,7 +431,7 @@ bool GSDevice12::CreateCommandLists() } // Close the command lists, since the first thing we do is reset them. - hr = res.command_lists[i]->Close(); + hr = res.command_lists[i].list4->Close(); pxAssertRel(SUCCEEDED(hr), "Closing new command list failed"); if (FAILED(hr)) return false; @@ -365,7 +469,7 @@ void GSDevice12::MoveToNextCommandList() // Begin command list. res.command_allocators[1]->Reset(); - res.command_lists[1]->Reset(res.command_allocators[1].get(), nullptr); + res.command_lists[1].list4->Reset(res.command_allocators[1].get(), nullptr); res.descriptor_allocator.Reset(); if (res.sampler_allocator.ShouldReset()) res.sampler_allocator.Reset(); @@ -397,18 +501,18 @@ void GSDevice12::MoveToNextCommandList() res.has_timestamp_query = m_gpu_timing_enabled; if (m_gpu_timing_enabled) { - res.command_lists[1]->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, + res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); } ID3D12DescriptorHeap* heaps[2] = { res.descriptor_allocator.GetDescriptorHeap(), res.sampler_allocator.GetDescriptorHeap()}; - res.command_lists[1]->SetDescriptorHeaps(std::size(heaps), heaps); + res.command_lists[1].list4->SetDescriptorHeaps(std::size(heaps), heaps); m_allocator->SetCurrentFrameIndex(static_cast(m_current_fence_value)); } -ID3D12GraphicsCommandList4* GSDevice12::GetInitCommandList() +const D3D12CommandList& GSDevice12::GetInitCommandList() { CommandListResources& res = m_command_lists[m_current_command_list]; if (!res.init_command_list_used) @@ -416,12 +520,12 @@ ID3D12GraphicsCommandList4* GSDevice12::GetInitCommandList() [[maybe_unused]] HRESULT hr = res.command_allocators[0]->Reset(); pxAssertMsg(SUCCEEDED(hr), "Reset init command allocator failed"); - res.command_lists[0]->Reset(res.command_allocators[0].get(), nullptr); + res.command_lists[0].list4->Reset(res.command_allocators[0].get(), nullptr); pxAssertMsg(SUCCEEDED(hr), "Reset init command list failed"); res.init_command_list_used = true; } - return res.command_lists[0].get(); + return res.command_lists[0]; } bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion) @@ -432,16 +536,16 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion) if (res.has_timestamp_query) { // write the timestamp back at the end of the cmdlist - res.command_lists[1]->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, + res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); - res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, + res.command_lists[1].list4->ResolveQueryData(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP, m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.get(), m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); } if (res.init_command_list_used) { - hr = res.command_lists[0]->Close(); + hr = res.command_lists[0].list4->Close(); if (FAILED(hr)) { Console.Error("D3D12: Closing init command list failed with HRESULT %08X", hr); @@ -450,7 +554,7 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion) } // Close and queue command list. - hr = res.command_lists[1]->Close(); + hr = res.command_lists[1].list4->Close(); if (FAILED(hr)) { Console.Error("D3D12: Closing main command list failed with HRESULT %08X", hr); @@ -459,12 +563,12 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion) if (res.init_command_list_used) { - const std::array execute_lists{res.command_lists[0].get(), res.command_lists[1].get()}; + const std::array execute_lists{res.command_lists[0].list4.get(), res.command_lists[1].list4.get()}; m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); } else { - const std::array execute_lists{res.command_lists[1].get()}; + const std::array execute_lists{res.command_lists[1].list4.get()}; m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); } @@ -598,10 +702,14 @@ bool GSDevice12::CreateTimestampQuery() } const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK}; - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1, - DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; - hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put())); + const D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}}; + if (m_enhanced_barriers) + hr = m_allocator->CreateResource3(&allocation_desc, &resource_desc.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, + 0, nullptr, m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put())); + else + hr = m_allocator->CreateResource(&allocation_desc, &resource_desc.desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put())); if (FAILED(hr)) { Console.Error("D3D12: CreateResource() for timestamp failed with %08X", hr); @@ -638,15 +746,20 @@ bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_ { // Try to place the fixed index buffer in GPU local memory. // Use the staging buffer to copy into it. - const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; + const D3D12_RESOURCE_DESCU rd = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}}; const D3D12MA::ALLOCATION_DESC cpu_ad = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD}; ComPtr cpu_buffer; ComPtr cpu_allocation; - HRESULT hr = m_allocator->CreateResource( - &cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put())); + HRESULT hr; + if (m_enhanced_barriers) + hr = m_allocator->CreateResource3( + &cpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put())); + else + hr = m_allocator->CreateResource( + &cpu_ad, &rd.desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put())); pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer"); if (FAILED(hr)) return false; @@ -662,21 +775,34 @@ bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_ cpu_buffer->Unmap(0, &write_range); const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT}; - - hr = m_allocator->CreateResource( - &gpu_ad, &rd, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer)); + if (m_enhanced_barriers) + hr = m_allocator->CreateResource3( + &gpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer)); + else + hr = m_allocator->CreateResource( + &gpu_ad, &rd.desc, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer)); pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer"); if (FAILED(hr)) return false; - GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size); + GetInitCommandList().list4->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size); - D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE}; - rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - rb.Transition.pResource = *gpu_buffer; - rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use. - rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER; - GetInitCommandList()->ResourceBarrier(1, &rb); + if (m_enhanced_barriers) + { + const D3D12_BUFFER_BARRIER barrier = {D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_INDEX_INPUT, + D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDEX_BUFFER, *gpu_buffer, 0, size}; + const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_BUFFER, .NumBarriers = 1, .pBufferBarriers = &barrier}; + GetInitCommandList().list7->Barrier(1, &group); + } + else + { + D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE}; + rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + rb.Transition.pResource = *gpu_buffer; + rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use. + rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER; + GetInitCommandList().list4->ResourceBarrier(1, &rb); + } DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get()); return true; @@ -768,7 +894,7 @@ void GSDevice12::Destroy() { GSDevice::Destroy(); - if (GetCommandList()) + if (GetCommandList().list4) { EndRenderPass(); ExecuteCommandList(true); @@ -919,11 +1045,11 @@ bool GSDevice12::CreateSwapChain() // Render a frame as soon as possible to clear out whatever was previously being displayed. EndRenderPass(); GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get(); - ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + const D3D12CommandList& cmdlist = GetCommandList(); m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); - swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr); - swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PRESENT); + swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget); + cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr); + swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::Present); ExecuteCommandList(false); m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0); return true; @@ -950,7 +1076,7 @@ bool GSDevice12::CreateSwapChainRTV() std::unique_ptr tex = GSTexture12::Adopt(std::move(backbuffer), GSTexture::Type::RenderTarget, GSTexture::Format::Color, swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, 1, swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format, - DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_STATE_COMMON); + DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, GSTexture12::ResourceState::Present); if (!tex) { m_swap_chain_buffers.clear(); @@ -1122,18 +1248,18 @@ GSDevice::PresentResult GSDevice12::BeginPresent(bool frame_skip) GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get(); - ID3D12GraphicsCommandList* cmdlist = GetCommandList(); - swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &swap_chain_buf->GetWriteDescriptor().cpu_handle, FALSE, nullptr); + const D3D12CommandList& cmdlist = GetCommandList(); + swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget); + cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr); + cmdlist.list4->OMSetRenderTargets(1, &swap_chain_buf->GetWriteDescriptor().cpu_handle, FALSE, nullptr); g_perfmon.Put(GSPerfMon::RenderPasses, 1); const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height), 0.0f, 1.0f}; const D3D12_RECT scissor{ 0, 0, static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)}; - cmdlist->RSSetViewports(1, &vp); - cmdlist->RSSetScissorRects(1, &scissor); + cmdlist.list4->RSSetViewports(1, &vp); + cmdlist.list4->RSSetScissorRects(1, &scissor); return PresentResult::OK; } @@ -1144,7 +1270,7 @@ void GSDevice12::EndPresent() GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get(); m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); - swap_chain_buf->TransitionToState(GetCommandList(), D3D12_RESOURCE_STATE_PRESENT); + swap_chain_buf->TransitionToState(GSTexture12::ResourceState::Present); if (!ExecuteCommandList(WaitType::None)) { m_device_lost = true; @@ -1187,7 +1313,7 @@ void GSDevice12::PushDebugGroup(const char* fmt, ...) const UINT color = Palette( ++s_debug_scope_depth, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); - PIXBeginEvent(GetCommandList(), color, "%s", buf.c_str()); + PIXBeginEvent(GetCommandList().list4.get(), color, "%s", buf.c_str()); #endif } @@ -1199,7 +1325,7 @@ void GSDevice12::PopDebugGroup() s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); - PIXEndEvent(GetCommandList()); + PIXEndEvent(GetCommandList().list4.get()); #endif } @@ -1230,7 +1356,7 @@ void GSDevice12::InsertDebugMessage(DebugMessageCategory category, const char* f static_cast(fcolor[1] * 255.0f), static_cast(fcolor[2] * 255.0f)); - PIXSetMarker(GetCommandList(), color, "%s", buf.c_str()); + PIXSetMarker(GetCommandList().list4.get(), color, "%s", buf.c_str()); #endif } @@ -1263,30 +1389,43 @@ bool GSDevice12::CheckFeatures(const u32& vendor_id) Console.WriteLnFmt("D3D12: Programmable Sample Position: {}", m_programmable_sample_positions ? "Supported" : "Not Supported"); BOOL allow_tearing_supported = false; - const HRESULT hr = m_dxgi_factory->CheckFeatureSupport( + HRESULT hr = m_dxgi_factory->CheckFeatureSupport( DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported)); m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); + D3D12_FEATURE_DATA_D3D12_OPTIONS12 device_options12 = {}; + hr = m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &device_options12, sizeof(device_options12)); + if (SUCCEEDED(hr)) + { + Console.WriteLnFmt("D3D12: Enhanced Barriers: {}", device_options12.EnhancedBarriersSupported ? "Supported" : "Not Supported"); + m_enhanced_barriers = device_options12.EnhancedBarriersSupported; + } + else + { + Console.WriteLnFmt("D3D12: Failed to check for Enhanced Barriers: 0x{:08x}", static_cast(hr)); + m_enhanced_barriers = false; + } + return true; } void GSDevice12::DrawPrimitive() { g_perfmon.Put(GSPerfMon::DrawCalls, 1); - GetCommandList()->DrawInstanced(m_vertex.count, 1, m_vertex.start, 0); + GetCommandList().list4->DrawInstanced(m_vertex.count, 1, m_vertex.start, 0); } void GSDevice12::DrawIndexedPrimitive() { g_perfmon.Put(GSPerfMon::DrawCalls, 1); - GetCommandList()->DrawIndexedInstanced(m_index.count, 1, m_index.start, m_vertex.start, 0); + GetCommandList().list4->DrawIndexedInstanced(m_index.count, 1, m_index.start, m_vertex.start, 0); } void GSDevice12::DrawIndexedPrimitive(int offset, int count) { pxAssert(offset + count <= (int)m_index.count); g_perfmon.Put(GSPerfMon::DrawCalls, 1); - GetCommandList()->DrawIndexedInstanced(count, 1, m_index.start + offset, m_vertex.start, 0); + GetCommandList().list4->DrawIndexedInstanced(count, 1, m_index.start + offset, m_vertex.start, 0); } void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_format, DXGI_FORMAT* srv_format, @@ -1391,14 +1530,14 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, if (dTex12->GetType() != GSTexture::Type::DepthStencil) { - dTex12->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - GetCommandList()->ClearRenderTargetView( + dTex12->TransitionToState(GSTexture12::ResourceState::RenderTarget); + GetCommandList().list4->ClearRenderTargetView( dTex12->GetWriteDescriptor(), sTex12->GetUNormClearColor().v, 0, nullptr); } else { - dTex12->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); - GetCommandList()->ClearDepthStencilView( + dTex12->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil); + GetCommandList().list4->ClearDepthStencilView( dTex12->GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, sTex12->GetClearDepth(), 0, 0, nullptr); } @@ -1418,12 +1557,12 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, EndRenderPass(); - sTex12->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + sTex12->TransitionToState(GSTexture12::ResourceState::CopySrc); sTex12->SetUseFenceCounter(GetCurrentFenceValue()); if (m_tfx_textures[0] && sTex12->GetSRVDescriptor() == m_tfx_textures[0]) PSSetShaderResource(0, nullptr, false); - dTex12->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + dTex12->TransitionToState(GSTexture12::ResourceState::CopyDst); dTex12->SetUseFenceCounter(GetCurrentFenceValue()); D3D12_TEXTURE_COPY_LOCATION srcloc; @@ -1440,13 +1579,13 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, const bool full_rt_copy = src_dst_rect_match && ((sTex12->IsDepthStencil() && !m_programmable_sample_positions) || (destX == 0 && destY == 0 && r.eq(src_rect))); if (full_rt_copy) { - GetCommandList()->CopyResource(dTex12->GetResource(), sTex12->GetResource()); + GetCommandList().list4->CopyResource(dTex12->GetResource(), sTex12->GetResource()); } else { const D3D12_BOX srcbox{static_cast(r.left), static_cast(r.top), 0u, static_cast(r.right), static_cast(r.bottom), 1u}; - GetCommandList()->CopyTextureRegion(&dstloc, destX, destY, 0, &srcloc, &srcbox); + GetCommandList().list4->CopyTextureRegion(&dstloc, destX, destY, 0, &srcloc, &srcbox); } dTex12->SetState(GSTexture::State::Dirty); @@ -1560,10 +1699,10 @@ void GSDevice12::DrawMultiStretchRects( { GSTexture12* const stex = static_cast(rects[i].src); stex->CommitClear(); - if (stex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + if (stex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource) { EndRenderPass(); - stex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + stex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } } @@ -1689,11 +1828,11 @@ void GSDevice12::BeginRenderPassForStretchRect( void GSDevice12::DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect, const ID3D12PipelineState* pipeline, bool linear, bool allow_discard) { - if (sTex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + if (sTex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource) { // can't transition in a render pass EndRenderPass(); - sTex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + sTex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } SetUtilityRootSignature(); @@ -1770,14 +1909,14 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, if (has_input_0) { static_cast(sTex[0])->CommitClear(); - static_cast(sTex[0])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(sTex[0])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } if (has_input_1) { static_cast(sTex[1])->CommitClear(); - static_cast(sTex[1])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(sTex[1])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget); // Upload constant to select YUV algo, but skip constant buffer update if we don't need it if (feedback_write_2 || feedback_write_1 || sTex[0]) @@ -1830,7 +1969,7 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, if (sTex[0] == sTex[2]) { // need a barrier here because of the render pass - static_cast(sTex[2])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(sTex[2])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } } @@ -1877,13 +2016,13 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, // this texture is going to get used as an input, so make sure we don't read undefined data static_cast(dTex)->CommitClear(); - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb) { - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget); const GSVector4i rc = GSVector4i(dRect); const GSVector4i dtex_rc = dTex->GetRect(); @@ -1899,7 +2038,7 @@ void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* EndRenderPass(); // this texture is going to get used as an input, so make sure we don't read undefined data - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) @@ -1918,7 +2057,7 @@ void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float para DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize()); EndRenderPass(); - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex) @@ -1936,7 +2075,7 @@ void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex) DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize()); EndRenderPass(); - static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + static_cast(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } bool GSDevice12::CompileCASPipelines() @@ -2100,7 +2239,7 @@ void GSDevice12::RenderImGui() D3D12DescriptorHandle handle = m_null_texture->GetSRVDescriptor(); if (tex) { - tex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); handle = tex->GetSRVDescriptor(); } @@ -2118,7 +2257,7 @@ void GSDevice12::RenderImGui() if (ApplyUtilityState()) { - GetCommandList()->DrawIndexedInstanced( + GetCommandList().list4->DrawIndexedInstanced( pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0); } } @@ -2147,23 +2286,23 @@ bool GSDevice12::DoCAS( } } - ID3D12GraphicsCommandList* const cmdlist = GetCommandList(); - const D3D12_RESOURCE_STATES old_state = sTex12->GetResourceState(); - sTex12->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - dTex12->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + const D3D12CommandList& cmdlist = GetCommandList(); + const GSTexture12::ResourceState old_state = sTex12->GetResourceState(); + sTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::ComputeShaderResource); + dTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::CASShaderUAV); - cmdlist->SetComputeRootSignature(m_cas_root_signature.get()); - cmdlist->SetComputeRoot32BitConstants( + cmdlist.list4->SetComputeRootSignature(m_cas_root_signature.get()); + cmdlist.list4->SetComputeRoot32BitConstants( CAS_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, NUM_CAS_CONSTANTS, constants.data(), 0); - cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH); - cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH); - cmdlist->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get()); + cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH); + cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH); + cmdlist.list4->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get()); m_dirty_flags |= DIRTY_FLAG_PIPELINE; static const int threadGroupWorkRegionDim = 16; const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - cmdlist->Dispatch(dispatchX, dispatchY, 1); + cmdlist.list4->Dispatch(dispatchX, dispatchY, 1); sTex12->TransitionToState(cmdlist, old_state); return true; @@ -2243,9 +2382,9 @@ void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if (!InRenderPass()) { if (vkRt) - vkRt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); + vkRt->TransitionToState(GSTexture12::ResourceState::RenderTarget); if (vkDs) - vkDs->TransitionToState(depth_read ? (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) : D3D12_RESOURCE_STATE_DEPTH_WRITE); + vkDs->TransitionToState(depth_read ? GSTexture12::ResourceState::DepthReadStencil : GSTexture12::ResourceState::DepthWriteStencil); } // This is used to set/initialize the framebuffer for tfx rendering. @@ -2374,7 +2513,7 @@ bool GSDevice12::CreateNullTexture() if (!m_null_texture) return false; - m_null_texture->TransitionToState(GetCommandList(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_null_texture->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); D3D12::SetObjectName(m_null_texture->GetResource(), "Null texture"); return true; } @@ -3153,7 +3292,7 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion if (was_in_render_pass) { // rebind everything except RT, because the RP does that for us - ApplyBaseState(m_dirty_flags & ~DIRTY_FLAG_RENDER_TARGET, GetCommandList()); + ApplyBaseState(m_dirty_flags & ~DIRTY_FLAG_RENDER_TARGET, GetCommandList().list4.get()); m_dirty_flags &= ~DIRTY_BASE_STATE; // restart render pass @@ -3244,17 +3383,17 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state, boo GSTexture12* dtex = static_cast(sr); if (check_state) { - if (dtex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE && InRenderPass()) + if (dtex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource && InRenderPass()) { GL_INS("Ending render pass due to resource transition"); EndRenderPass(); } dtex->CommitClear(); - dtex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + dtex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } dtex->SetUseFenceCounter(GetCurrentFenceValue()); - handle = feedback ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor(); + handle = (feedback && !m_enhanced_barriers) ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor(); } else { @@ -3285,7 +3424,7 @@ void GSDevice12::SetUtilityRootSignature() m_current_root_signature = RootSignature::Utility; m_dirty_flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_PIPELINE; - GetCommandList()->SetGraphicsRootSignature(m_utility_root_signature.get()); + GetCommandList().list4->SetGraphicsRootSignature(m_utility_root_signature.get()); } void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle& sampler) @@ -3295,7 +3434,7 @@ void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle& { GSTexture12* d12tex = static_cast(dtex); d12tex->CommitClear(); - d12tex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + d12tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); d12tex->SetUseFenceCounter(GetCurrentFenceValue()); handle = d12tex->GetSRVDescriptor(); } @@ -3333,7 +3472,7 @@ void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle& void GSDevice12::SetUtilityPushConstants(const void* data, u32 size) { - GetCommandList()->SetGraphicsRoot32BitConstants( + GetCommandList().list4->SetGraphicsRoot32BitConstants( UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, (size + 3) / sizeof(u32), data, 0); } @@ -3396,13 +3535,13 @@ void GSDevice12::RenderTextureMipmap( } // *now* we don't have to worry about running out of anything. - ID3D12GraphicsCommandList* cmdlist = GetCommandList(); - if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + const D3D12CommandList& cmdlist = GetCommandList(); + if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource) texture->TransitionSubresourceToState( - cmdlist, src_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) + cmdlist, src_level, texture->GetResourceState(), GSTexture12::ResourceState::PixelShaderResource); + if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget) texture->TransitionSubresourceToState( - cmdlist, dst_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_RENDER_TARGET); + cmdlist, dst_level, texture->GetResourceState(), GSTexture12::ResourceState::RenderTarget); // We set the state directly here. constexpr u32 MODIFIED_STATE = DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET; @@ -3410,14 +3549,14 @@ void GSDevice12::RenderTextureMipmap( // Using a render pass is probably a bit overkill. const D3D12_DISCARD_REGION discard_region = {0u, nullptr, dst_level, 1u}; - cmdlist->DiscardResource(texture->GetResource(), &discard_region); - cmdlist->OMSetRenderTargets(1, &rtv_handle.cpu_handle, FALSE, nullptr); + cmdlist.list4->DiscardResource(texture->GetResource(), &discard_region); + cmdlist.list4->OMSetRenderTargets(1, &rtv_handle.cpu_handle, FALSE, nullptr); const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height), 0.0f, 1.0f}; - cmdlist->RSSetViewports(1, &vp); + cmdlist.list4->RSSetViewports(1, &vp); const D3D12_RECT scissor = {0, 0, static_cast(dst_width), static_cast(dst_height)}; - cmdlist->RSSetScissorRects(1, &scissor); + cmdlist.list4->RSSetScissorRects(1, &scissor); SetUtilityRootSignature(); SetPipeline(m_convert[static_cast(ShaderConvert::COPY)].get()); @@ -3425,12 +3564,12 @@ void GSDevice12::RenderTextureMipmap( GSVector4(0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height)), GSVector2i(dst_width, dst_height)); - if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource) texture->TransitionSubresourceToState( - cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, texture->GetResourceState()); - if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET) + cmdlist, src_level, GSTexture12::ResourceState::PixelShaderResource, texture->GetResourceState()); + if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget) texture->TransitionSubresourceToState( - cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET, texture->GetResourceState()); + cmdlist, dst_level, GSTexture12::ResourceState::RenderTarget, texture->GetResourceState()); // Must destroy after current cmdlist. DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle); @@ -3496,7 +3635,7 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b } } - GetCommandList()->BeginRenderPass(m_current_render_target ? 1 : 0, + GetCommandList().list4->BeginRenderPass(m_current_render_target ? 1 : 0, m_current_render_target ? &rt : nullptr, m_current_depth_target ? &ds : nullptr, (m_current_depth_target && m_current_depth_read_only) ? (D3D12_RENDER_PASS_FLAG_BIND_READ_ONLY_DEPTH) : D3D12_RENDER_PASS_FLAG_NONE); } @@ -3513,7 +3652,7 @@ void GSDevice12::EndRenderPass() g_perfmon.Put(GSPerfMon::RenderPasses, 1); - GetCommandList()->EndRenderPass(); + GetCommandList().list4->EndRenderPass(); } void GSDevice12::SetViewport(const D3D12_VIEWPORT& viewport) @@ -3675,7 +3814,7 @@ bool GSDevice12::ApplyTFXState(bool already_execed) flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2; } - ID3D12GraphicsCommandList* cmdlist = GetCommandList(); + ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get(); if (m_current_root_signature != RootSignature::TFX) { @@ -3714,7 +3853,7 @@ bool GSDevice12::ApplyUtilityState(bool already_execed) u32 flags = m_dirty_flags; m_dirty_flags &= ~DIRTY_UTILITY_STATE; - ID3D12GraphicsCommandList* cmdlist = GetCommandList(); + ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get(); if (m_current_root_signature != RootSignature::Utility) { @@ -3846,11 +3985,37 @@ GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe config.alpha_second_pass.ps.date = 3; // and bind the image to the primitive sampler - image->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + image->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); PSSetShaderResource(3, image, false); return image; } +void GSDevice12::FeedbackBarrier(const GSTexture12* texture) +{ + // The DX12 spec notes "You may not read from, or consume, a write that occurred within the same render pass". + // The only exception being the implicit reads for render target blending or depth testing. + // Thus, in addition to a barrier, we need to end the render pass. + EndRenderPass(); + if (m_enhanced_barriers) + { + // Enhanced barriers allows for single resource feedback. + const D3D12_BARRIER_SYNC sync = D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING; + const D3D12_BARRIER_ACCESS access = D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + const D3D12_TEXTURE_BARRIER barrier = {sync, sync, access, access, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_LAYOUT_COMMON, + texture->GetResource(), {D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, 0, 0, 0, 0, 0}, D3D12_TEXTURE_BARRIER_FLAG_NONE}; + const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_TEXTURE, .NumBarriers = 1, .pTextureBarriers = &barrier}; + GetCommandList().list7->Barrier(1, &group); + } + else + { + // Specify null for the after resource as both resources are used after the barrier. + // While this may also be true before the barrier, we only write using the main resource. + D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; + barrier.Aliasing = {texture->GetResource(), nullptr}; + GetCommandList().list4->ResourceBarrier(1, &barrier); + } +} + void GSDevice12::RenderHW(GSHWDrawConfig& config) { @@ -3871,12 +4036,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) if (m_features.texture_barrier && config.tex && (config.tex == config.rt) && !(config.require_one_barrier || config.require_full_barrier)) { g_perfmon.Put(GSPerfMon::Barriers, 1); - - EndRenderPass(); - // Specify null for the after resource as both resources are used after the barrier. - D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; - barrier.Aliasing = {draw_rt->GetResource(), nullptr}; - GetCommandList()->ResourceBarrier(1, &barrier); + FeedbackBarrier(draw_rt); } // now blit the colclip texture back to the original target @@ -3887,7 +4047,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) GL_PUSH("Blit ColorClip back to RT"); EndRenderPass(); - colclip_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); draw_rt = static_cast(config.rt); OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area); @@ -4001,7 +4161,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) else if (draw_rt->GetState() == GSTexture::State::Dirty) { GL_PUSH_("ColorClip Render Target Setup"); - draw_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + draw_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); } // we're not drawing to the RT, so we can use it as a source @@ -4064,10 +4224,10 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) { EndRenderPass(); // Make sure the DSV is in writeable state - draw_ds->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); + draw_ds->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil); D3D12_RECT rect = {config.drawarea.left, config.drawarea.top, config.drawarea.left + config.drawarea.width(), config.drawarea.top + config.drawarea.height()}; - GetCommandList()->ClearDepthStencilView(draw_ds->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, 0.0f, 1, 1, &rect); + GetCommandList().list4->ClearDepthStencilView(draw_ds->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, 0.0f, 1, 1, &rect); } // Begin render pass if new target or out of the area. @@ -4167,7 +4327,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) GL_PUSH("Blit ColorClip back to RT"); EndRenderPass(); - colclip_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource); draw_rt = static_cast(config.rt); OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area); @@ -4223,12 +4383,7 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& { const u32 count = (*config.drawlist)[n] * indices_per_prim; - EndRenderPass(); - // Specify null for the after resource as both resources are used after the barrier. - // While this may also be true before the barrier, we only write using the main resource. - D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; - barrier.Aliasing = {draw_rt->GetResource(), nullptr}; - GetCommandList()->ResourceBarrier(1, &barrier); + FeedbackBarrier(draw_rt); if (BindDrawPipeline(pipe)) DrawIndexedPrimitive(p, count); @@ -4242,11 +4397,7 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& { g_perfmon.Put(GSPerfMon::Barriers, 1); - EndRenderPass(); - // Specify null for the after resource as both resources are used after the barrier. - D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE}; - barrier.Aliasing = {draw_rt->GetResource(), nullptr}; - GetCommandList()->ResourceBarrier(1, &barrier); + FeedbackBarrier(draw_rt); } } diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index 7244dc64b2..5eb2f1d410 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -21,6 +21,14 @@ namespace D3D12MA class Allocator; } +struct D3D12CommandList +{ + // Main command list + wil::com_ptr_nothrow list4; + // Enhanced barriers command list + wil::com_ptr_nothrow list7; +}; + class GSDevice12 final : public GSDevice { public: @@ -42,6 +50,12 @@ public: NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2, }; + union D3D12_RESOURCE_DESCU + { + D3D12_RESOURCE_DESC1 desc1; + D3D12_RESOURCE_DESC desc; + }; + __fi IDXGIAdapter1* GetAdapter() const { return m_adapter.get(); } __fi ID3D12Device* GetDevice() const { return m_device.get(); } __fi ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.get(); } @@ -50,14 +64,16 @@ public: /// Returns the PCI vendor ID of the device, if known. u32 GetAdapterVendorID() const; + bool UseEnhancedBarriers() const { return m_enhanced_barriers; } + /// Returns the current command list, commands can be recorded directly. - ID3D12GraphicsCommandList4* GetCommandList() const + const D3D12CommandList& GetCommandList() const { - return m_command_lists[m_current_command_list].command_lists[1].get(); + return m_command_lists[m_current_command_list].command_lists[1]; } /// Returns the init command list for uploading. - ID3D12GraphicsCommandList4* GetInitCommandList(); + const D3D12CommandList& GetInitCommandList(); /// Returns the per-frame SRV/CBV/UAV allocator. D3D12DescriptorAllocator& GetDescriptorAllocator() @@ -137,7 +153,7 @@ private: struct CommandListResources { std::array, 2> command_allocators; - std::array, 2> command_lists; + std::array command_lists; D3D12DescriptorAllocator descriptor_allocator; D3D12GroupedSamplerAllocator sampler_allocator; std::vector> pending_resources; @@ -147,6 +163,8 @@ private: bool has_timestamp_query = false; }; + void LoadAgilitySDK(); + bool CreateDevice(u32& vendor_id); bool CreateDescriptorHeaps(); bool CreateCommandLists(); @@ -296,6 +314,7 @@ private: bool m_allow_tearing_supported = false; bool m_using_allow_tearing = false; bool m_is_exclusive_fullscreen = false; + bool m_enhanced_barriers = true; bool m_device_lost = false; ComPtr m_tfx_root_signature; @@ -377,6 +396,8 @@ private: ComPtr GetUtilityVertexShader(const std::string& source, const char* entry_point); ComPtr GetUtilityPixelShader(const std::string& source, const char* entry_point); + void FeedbackBarrier(const GSTexture12* texture); + bool CheckFeatures(const u32& vendor_id); bool CreateNullTexture(); bool CreateBuffers(); diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp index 16aacdc9ee..bd96b85378 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp @@ -19,7 +19,7 @@ GSTexture12::GSTexture12(Type type, Format format, int width, int height, int le wil::com_ptr_nothrow allocation, const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& ro_dsv_descriptor, const D3D12DescriptorHandle& uav_descriptor, const D3D12DescriptorHandle& fbl_descriptor, - WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state) + WriteDescriptorType wdtype, bool simultaneous_texture, ResourceState resource_state) : m_resource(std::move(resource)) , m_resource_fbl(std::move(resource_fbl)) , m_allocation(std::move(allocation)) @@ -31,6 +31,7 @@ GSTexture12::GSTexture12(Type type, Format format, int width, int height, int le , m_write_descriptor_type(wdtype) , m_dxgi_format(dxgi_format) , m_resource_state(resource_state) + , m_simultaneous_tex(simultaneous_texture) { m_type = type; m_format = format; @@ -111,28 +112,87 @@ void GSTexture12::Destroy(bool defer) m_write_descriptor_type = WriteDescriptorType::None; } +// For use with non-simultaneous textures only. +// Simultaneous testures are always D3D12_BARRIER_LAYOUT_COMMON. +static D3D12_BARRIER_LAYOUT GetD3D12BarrierLayout(GSTexture12::ResourceState state) +{ + switch (state) + { + case GSTexture12::ResourceState::Undefined: + return D3D12_BARRIER_LAYOUT_UNDEFINED; + case GSTexture12::ResourceState::Present: + return D3D12_BARRIER_LAYOUT_COMMON; + case GSTexture12::ResourceState::RenderTarget: + return D3D12_BARRIER_LAYOUT_RENDER_TARGET; + case GSTexture12::ResourceState::DepthWriteStencil: + return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + case GSTexture12::ResourceState::PixelShaderResource: + case GSTexture12::ResourceState::ComputeShaderResource: + return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE; + case GSTexture12::ResourceState::CopySrc: + return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE; + case GSTexture12::ResourceState::CopyDst: + return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST; + case GSTexture12::ResourceState::CASShaderUAV: + case GSTexture12::ResourceState::PixelShaderUAV: + return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS; + default: + pxAssert(false); + return D3D12_BARRIER_LAYOUT_UNDEFINED; + } +} + +static D3D12_RESOURCE_STATES GetD3D12ResourceState(GSTexture12::ResourceState state) +{ + switch (state) + { + case GSTexture12::ResourceState::Undefined: + return D3D12_RESOURCE_STATE_COMMON; + case GSTexture12::ResourceState::Present: + return D3D12_RESOURCE_STATE_COMMON; + case GSTexture12::ResourceState::RenderTarget: + return D3D12_RESOURCE_STATE_RENDER_TARGET; + case GSTexture12::ResourceState::DepthWriteStencil: + return D3D12_RESOURCE_STATE_DEPTH_WRITE; + case GSTexture12::ResourceState::PixelShaderResource: + return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + case GSTexture12::ResourceState::ComputeShaderResource: + return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + case GSTexture12::ResourceState::CopySrc: + return D3D12_RESOURCE_STATE_COPY_SOURCE; + case GSTexture12::ResourceState::CopyDst: + return D3D12_RESOURCE_STATE_COPY_DEST; + case GSTexture12::ResourceState::CASShaderUAV: + case GSTexture12::ResourceState::PixelShaderUAV: + return D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + default: + pxAssert(false); + return D3D12_RESOURCE_STATE_COMMON; + } +} + std::unique_ptr GSTexture12::Create(Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format) { GSDevice12* const dev = GSDevice12::GetInstance(); - D3D12_RESOURCE_DESC desc = {}; - desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - desc.Width = width; - desc.Height = height; - desc.DepthOrArraySize = 1; - desc.MipLevels = levels; - desc.Format = dxgi_format; - desc.SampleDesc.Count = 1; - desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + GSDevice12::D3D12_RESOURCE_DESCU desc = {}; + desc.desc1.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.desc1.Width = width; + desc.desc1.Height = height; + desc.desc1.DepthOrArraySize = 1; + desc.desc1.MipLevels = levels; + desc.desc1.Format = dxgi_format; + desc.desc1.SampleDesc.Count = 1; + desc.desc1.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; D3D12MA::ALLOCATION_DESC allocationDesc = {}; allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_WITHIN_BUDGET; allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; D3D12_CLEAR_VALUE optimized_clear_value = {}; - D3D12_RESOURCE_STATES state; + ResourceState state; switch (type) { @@ -140,9 +200,9 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w { // This is a little annoying. basically, to do mipmap generation, we need to be a render target. // If it's a compressed texture, we won't be generating mips anyway, so this should be fine. - desc.Flags = (levels > 1 && !IsCompressedFormat(format)) ? D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET : - D3D12_RESOURCE_FLAG_NONE; - state = D3D12_RESOURCE_STATE_COPY_DEST; + desc.desc1.Flags = (levels > 1 && !IsCompressedFormat(format)) ? D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET : + D3D12_RESOURCE_FLAG_NONE; + state = ResourceState::CopyDst; } break; @@ -152,10 +212,11 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w pxAssert(levels == 1); allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; allocationDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; - desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + desc.desc1.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; + if (!dev->UseEnhancedBarriers()) + desc.desc1.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; optimized_clear_value.Format = rtv_format; - state = D3D12_RESOURCE_STATE_RENDER_TARGET; + state = ResourceState::RenderTarget; } break; @@ -163,9 +224,9 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w { pxAssert(levels == 1); allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; - desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + desc.desc1.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; optimized_clear_value.Format = dsv_format; - state = D3D12_RESOURCE_STATE_DEPTH_WRITE; + state = ResourceState::DepthWriteStencil; } break; @@ -173,7 +234,7 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w { pxAssert(levels == 1); allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; - state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + state = ResourceState::PixelShaderResource; } break; @@ -182,15 +243,16 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w } if (uav_format != DXGI_FORMAT_UNKNOWN) - desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + desc.desc1.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; wil::com_ptr_nothrow resource; wil::com_ptr_nothrow resource_fbl; wil::com_ptr_nothrow allocation; - if (type == Type::RenderTarget) + if (type == Type::RenderTarget && !dev->UseEnhancedBarriers()) { - const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc); + // We need to use an aliased resource for feedback with legacy barriers. + const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc.desc); HRESULT hr = dev->GetAllocator()->AllocateMemory(&allocationDesc, &allocInfo, allocation.put()); if (FAILED(hr)) @@ -202,7 +264,7 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return {}; } - hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, state, + hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc.desc, GetD3D12ResourceState(state), (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, IID_PPV_ARGS(resource.put())); if (FAILED(hr)) @@ -214,7 +276,7 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return {}; } - hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc.desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, IID_PPV_ARGS(resource_fbl.put())); if (FAILED(hr)) @@ -228,9 +290,20 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w } else { - HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state, - (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(), - IID_PPV_ARGS(resource.put())); + HRESULT hr; + if (dev->UseEnhancedBarriers()) + { + hr = dev->GetAllocator()->CreateResource3(&allocationDesc, &desc.desc1, + type == Type::RenderTarget ? D3D12_BARRIER_LAYOUT_COMMON : GetD3D12BarrierLayout(state), + (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, + 0, nullptr, allocation.put(), IID_PPV_ARGS(resource.put())); + } + else + { + hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc.desc, GetD3D12ResourceState(state), + (type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(), + IID_PPV_ARGS(resource.put())); + } if (FAILED(hr)) { // OOM isn't fatal. @@ -301,8 +374,10 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return {}; } + // Feedback descriptor used with legacy barriers if (resource_fbl) { + pxAssert(!dev->UseEnhancedBarriers()); if (!CreateSRVDescriptor(resource_fbl.get(), levels, srv_format, &fbl_descriptor)) { dev->GetDescriptorHeapManager().Free(&uav_descriptor); @@ -325,12 +400,12 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return std::unique_ptr( new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(resource_fbl), std::move(allocation), - srv_descriptor, write_descriptor, ro_dsv_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, state)); + srv_descriptor, write_descriptor, ro_dsv_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, type == Type::RenderTarget, state)); } std::unique_ptr GSTexture12::Adopt(wil::com_ptr_nothrow resource, Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, - DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, D3D12_RESOURCE_STATES resource_state) + DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, ResourceState resource_state) { const D3D12_RESOURCE_DESC desc = resource->GetDesc(); @@ -391,7 +466,7 @@ std::unique_ptr GSTexture12::Adopt(wil::com_ptr_nothrow(new GSTexture12(type, format, static_cast(desc.Width), desc.Height, desc.MipLevels, desc.Format, std::move(resource), {}, {}, srv_descriptor, write_descriptor, {}, uav_descriptor, - {}, write_descriptor_type, resource_state)); + {}, write_descriptor_type, false, resource_state)); } bool GSTexture12::CreateSRVDescriptor( @@ -432,7 +507,7 @@ bool GSTexture12::CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT form return false; } - const D3D12_DEPTH_STENCIL_VIEW_DESC desc = {format, D3D12_DSV_DIMENSION_TEXTURE2D, read_only ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE }; + const D3D12_DEPTH_STENCIL_VIEW_DESC desc = {format, D3D12_DSV_DIMENSION_TEXTURE2D, read_only ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE}; GSDevice12::GetInstance()->GetDevice()->CreateDepthStencilView(resource, &desc, dh->cpu_handle); return true; } @@ -455,7 +530,7 @@ void* GSTexture12::GetNativeHandle() const return const_cast(this); } -ID3D12GraphicsCommandList* GSTexture12::GetCommandBufferForUpdate() +const D3D12CommandList& GSTexture12::GetCommandBufferForUpdate() { GSDevice12* const dev = GSDevice12::GetInstance(); if (m_type != Type::Texture || m_use_fence_counter == dev->GetCurrentFenceValue()) @@ -476,10 +551,20 @@ ID3D12Resource* GSTexture12::AllocateUploadStagingBuffer( wil::com_ptr_nothrow allocation; const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD}; - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1, - DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; - HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put())); + + const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}}; + HRESULT hr; + if (GSDevice12::GetInstance()->UseEnhancedBarriers()) + { + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocation_desc, &resource_desc.desc1, + D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(resource.put())); + } + else + { + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc.desc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put())); + } if (FAILED(hr)) { Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr); @@ -564,14 +649,14 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l sbuffer.CommitMemory(required_size); } - ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate(); + const D3D12CommandList& cmdlist = GetCommandBufferForUpdate(); GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer); // first time the texture is used? don't leave it undefined - if (m_resource_state == D3D12_RESOURCE_STATE_COMMON) - TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST); - else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) - TransitionSubresourceToState(cmdlist, layer, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST); + if (m_resource_state == GSTexture12::ResourceState::Undefined) + TransitionToState(cmdlist, GSTexture12::ResourceState::CopyDst); + else if (m_resource_state != GSTexture12::ResourceState::CopyDst) + TransitionSubresourceToState(cmdlist, layer, m_resource_state, GSTexture12::ResourceState::CopyDst); // if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear if (m_type == Type::RenderTarget) @@ -588,11 +673,11 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l dstloc.SubresourceIndex = layer; const D3D12_BOX srcbox{0u, 0u, 0u, width, height, 1u}; - cmdlist->CopyTextureRegion(&dstloc, Common::AlignDownPow2((u32)r.x, block_size), + cmdlist.list4->CopyTextureRegion(&dstloc, Common::AlignDownPow2((u32)r.x, block_size), Common::AlignDownPow2((u32)r.y, block_size), 0, &srcloc, &srcbox); - if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) - TransitionSubresourceToState(cmdlist, layer, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state); + if (m_resource_state != GSTexture12::ResourceState::CopyDst) + TransitionSubresourceToState(cmdlist, layer, GSTexture12::ResourceState::CopyDst, m_resource_state); if (m_type == Type::Texture) m_needs_mipmaps_generated |= (layer == 0); @@ -642,15 +727,15 @@ void GSTexture12::Unmap() const u32 buffer_offset = buffer.GetCurrentOffset(); buffer.CommitMemory(required_size); - ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate(); + const D3D12CommandList& cmdlist = GetCommandBufferForUpdate(); GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", m_map_area.x, m_map_area.y, m_map_area.width(), m_map_area.height(), m_map_level); // first time the texture is used? don't leave it undefined - if (m_resource_state == D3D12_RESOURCE_STATE_COMMON) - TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST); - else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) - TransitionSubresourceToState(cmdlist, m_map_level, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST); + if (m_resource_state == ResourceState::Undefined) + TransitionToState(cmdlist, ResourceState::CopyDst); + else if (m_resource_state != ResourceState::CopyDst) + TransitionSubresourceToState(cmdlist, m_map_level, m_resource_state, ResourceState::CopyDst); // if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear if (m_type == Type::RenderTarget) @@ -677,10 +762,10 @@ void GSTexture12::Unmap() dstloc.SubresourceIndex = m_map_level; const D3D12_BOX srcbox{0u, 0u, 0u, width, height, 1}; - cmdlist->CopyTextureRegion(&dstloc, m_map_area.x, m_map_area.y, 0, &srcloc, &srcbox); + cmdlist.list4->CopyTextureRegion(&dstloc, m_map_area.x, m_map_area.y, 0, &srcloc, &srcbox); - if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) - TransitionSubresourceToState(cmdlist, m_map_level, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state); + if (m_resource_state != ResourceState::CopyDst) + TransitionSubresourceToState(cmdlist, m_map_level, ResourceState::CopyDst, m_resource_state); if (m_type == Type::Texture) m_needs_mipmaps_generated |= (m_map_level == 0); @@ -717,55 +802,343 @@ void GSTexture12::SetDebugName(std::string_view name) #endif -void GSTexture12::TransitionToState(D3D12_RESOURCE_STATES state) +void GSTexture12::TransitionToState(ResourceState state) { TransitionToState(GSDevice12::GetInstance()->GetCommandList(), state); } -void GSTexture12::TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state) +void GSTexture12::TransitionToState(const D3D12CommandList& cmdlist, ResourceState state) { if (m_resource_state == state) return; - // Read only depth requires special handling as we might want to write stencil. - // Also batch the transition barriers as per recommendation from docs. - if (state == (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)) - { - // Transition to read depth/write stencil - const D3D12_RESOURCE_BARRIER barriers[2] = { - {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{m_resource.get(), 0, m_resource_state, (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)}}}, - {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{m_resource.get(), 1, m_resource_state, D3D12_RESOURCE_STATE_DEPTH_WRITE}}}, - }; - cmdlist->ResourceBarrier(m_resource_state == D3D12_RESOURCE_STATE_DEPTH_WRITE ? 1 : 2, barriers); - } - else if (m_resource_state == (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)) - { - // Transition from read depth/write stencil - const D3D12_RESOURCE_BARRIER barriers[2] = { - {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{m_resource.get(), 0, (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), state}}}, - {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{m_resource.get(), 1, D3D12_RESOURCE_STATE_DEPTH_WRITE, state}}}, - }; - cmdlist->ResourceBarrier(state == D3D12_RESOURCE_STATE_DEPTH_WRITE ? 1 : 2, barriers); - } - else - { - // Normal transition - TransitionSubresourceToState(cmdlist, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, m_resource_state, state); - } + TransitionSubresourceToState(cmdlist, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, m_resource_state, state); m_resource_state = state; } -void GSTexture12::TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, int level, - D3D12_RESOURCE_STATES before_state, D3D12_RESOURCE_STATES after_state) const +void GSTexture12::TransitionSubresourceToState(const D3D12CommandList& cmdlist, int level, + ResourceState before_state, ResourceState after_state) const { - const D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{m_resource.get(), static_cast(level), before_state, after_state}}}; - cmdlist->ResourceBarrier(1, &barrier); + if (GSDevice12::GetInstance()->UseEnhancedBarriers()) + { + // Read only depth requires special handling as we might want to write stencil. + // We need to transition subresources separately, requiring 2 barriers + // Handling it here allows us to batch those barriers. + // Other transitions only need the one barrier. + D3D12_TEXTURE_BARRIER barriers[2] = {{D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_NONE, + D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON, + D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_LAYOUT_COMMON, + m_resource.get(), {static_cast(level), 0, 0, 0, 0, 0}, D3D12_TEXTURE_BARRIER_FLAG_NONE}}; + + uint num_barriers = 1; + D3D12_TEXTURE_BARRIER& barrier = barriers[0]; + switch (before_state) + { + case ResourceState::Undefined: + case ResourceState::Present: + barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_COMMON; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS; + barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE; + break; + case ResourceState::RenderTarget: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_RENDER_TARGET; + barrier.AccessBefore = m_simultaneous_tex ? + D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE : + D3D12_BARRIER_ACCESS_RENDER_TARGET; + barrier.SyncBefore = m_simultaneous_tex ? + D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING : + D3D12_BARRIER_SYNC_RENDER_TARGET; + break; + case ResourceState::DepthWriteStencil: + pxAssert(!m_simultaneous_tex); + barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + barrier.SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL; + break; + case ResourceState::DepthReadStencil: + pxAssert(!m_simultaneous_tex); + pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + barriers[0].Subresources = {0, static_cast(m_mipmap_levels), 0, 1, 0, 1}; + barriers[0].LayoutBefore = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ; + barriers[0].AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + barriers[0].SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL | D3D12_BARRIER_SYNC_PIXEL_SHADING; + if (after_state != ResourceState::DepthWriteStencil) + { + num_barriers = 2; + barriers[1].Subresources = {0, static_cast(m_mipmap_levels), 0, 1, 1, 1}; + barriers[1].LayoutBefore = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + barriers[1].AccessBefore = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + barriers[1].SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } + break; + case ResourceState::PixelShaderResource: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + barrier.SyncBefore = D3D12_BARRIER_SYNC_PIXEL_SHADING; + break; + case ResourceState::ComputeShaderResource: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + barrier.SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING; + break; + case ResourceState::CopySrc: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_COPY_SOURCE; + barrier.SyncBefore = D3D12_BARRIER_SYNC_COPY; + break; + case ResourceState::CopyDst: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_COPY_DEST; + barrier.SyncBefore = D3D12_BARRIER_SYNC_COPY; + break; + case ResourceState::CASShaderUAV: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + barrier.SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING; + break; + case ResourceState::PixelShaderUAV: + barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + barrier.SyncBefore = D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + break; + default: + pxAssert(false); + barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED; + barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS; + barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE; + break; + } + + switch (after_state) + { + case ResourceState::Undefined: + case ResourceState::Present: + barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_COMMON; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS; + barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE; + break; + case ResourceState::RenderTarget: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_RENDER_TARGET; + barrier.AccessAfter = m_simultaneous_tex ? + D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE : + D3D12_BARRIER_ACCESS_RENDER_TARGET; + barrier.SyncAfter = m_simultaneous_tex ? + D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING : + D3D12_BARRIER_SYNC_RENDER_TARGET; + break; + case ResourceState::DepthWriteStencil: + pxAssert(!m_simultaneous_tex); + barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE; + barrier.SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL; + break; + case ResourceState::DepthReadStencil: + pxAssert(!m_simultaneous_tex); + pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + barriers[0].Subresources = {0, static_cast(m_mipmap_levels), 0, 1, 0, 1}; + barriers[0].LayoutAfter = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ; + barriers[0].AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + barriers[0].SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL | D3D12_BARRIER_SYNC_PIXEL_SHADING; + if (before_state != ResourceState::DepthWriteStencil) + { + num_barriers = 2; + barriers[1].Subresources = {0, static_cast(m_mipmap_levels), 0, 1, 1, 1}; + barriers[1].LayoutAfter = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; + barriers[1].AccessAfter = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ; + barriers[1].SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL; + } + break; + case ResourceState::PixelShaderResource: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + barrier.SyncAfter = D3D12_BARRIER_SYNC_PIXEL_SHADING; + break; + case ResourceState::ComputeShaderResource: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE; + barrier.SyncAfter = D3D12_BARRIER_SYNC_COMPUTE_SHADING; + break; + case ResourceState::CopySrc: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_COPY_SOURCE; + barrier.SyncAfter = D3D12_BARRIER_SYNC_COPY; + break; + case ResourceState::CopyDst: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_COPY_DEST; + barrier.SyncAfter = D3D12_BARRIER_SYNC_COPY; + break; + case ResourceState::CASShaderUAV: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + barrier.SyncAfter = D3D12_BARRIER_SYNC_COMPUTE_SHADING; + break; + case ResourceState::PixelShaderUAV: + barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; + barrier.SyncAfter = D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW; + break; + default: + pxAssert(false); + barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_UNDEFINED; + barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS; + barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE; + break; + } + + if (num_barriers == 2) + { + barriers[1].pResource = m_resource.get(); + barriers[1].Flags = barriers[0].Flags; + if (before_state == ResourceState::DepthReadStencil) + { + barriers[1].LayoutAfter = barriers[0].LayoutAfter; + barriers[1].AccessAfter = barriers[0].AccessAfter; + barriers[1].SyncAfter = barriers[0].SyncAfter; + } + else // after_state == ResourceState::DepthReadStencil + { + barriers[1].LayoutBefore = barriers[0].LayoutBefore; + barriers[1].AccessBefore = barriers[0].AccessBefore; + barriers[1].SyncBefore = barriers[0].SyncBefore; + } + } + + const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_TEXTURE, .NumBarriers = num_barriers, .pTextureBarriers = barriers}; + cmdlist.list7->Barrier(1, &group); + } + else + { + // Read only depth requires special handling as we might want to write stencil. + // We need to transition subresources separately, requiring 2 barriers. + // Handling it here allows us to batch those barriers. + // Other transitions only need the one barrier. + D3D12_RESOURCE_BARRIER barriers[2] = {{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE, + {{m_resource.get(), static_cast(level), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COMMON}}}}; + + int num_barriers = 1; + D3D12_RESOURCE_BARRIER& barrier = barriers[0]; + switch (before_state) + { + case ResourceState::Undefined: + case ResourceState::Present: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + break; + case ResourceState::RenderTarget: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + break; + case ResourceState::DepthWriteStencil: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + break; + case ResourceState::DepthReadStencil: + pxAssert(!m_simultaneous_tex); + pxAssert(m_mipmap_levels == 1); + pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + barriers[0].Transition.Subresource = 0; + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_DEPTH_READ; + if (after_state != ResourceState::DepthWriteStencil) + { + num_barriers = 2; + barriers[1].Transition.Subresource = 1; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + break; + case ResourceState::PixelShaderResource: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + break; + case ResourceState::ComputeShaderResource: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + break; + case ResourceState::CopySrc: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + break; + case ResourceState::CopyDst: + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + break; + case ResourceState::CASShaderUAV: + case ResourceState::PixelShaderUAV: + // Handled in after_state cases. + if (after_state == ResourceState::CASShaderUAV || after_state == ResourceState::PixelShaderUAV) + break; + + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + break; + default: + pxAssert(false); + break; + } + + switch (after_state) + { + case ResourceState::Undefined: + case ResourceState::Present: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + break; + case ResourceState::RenderTarget: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + break; + case ResourceState::DepthWriteStencil: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + break; + case ResourceState::DepthReadStencil: + pxAssert(!m_simultaneous_tex); + pxAssert(m_mipmap_levels == 1); + pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + barriers[0].Transition.Subresource = 0; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_DEPTH_READ; + if (before_state != ResourceState::DepthWriteStencil) + { + num_barriers = 2; + barriers[1].Transition.Subresource = 1; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + break; + case ResourceState::PixelShaderResource: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + break; + case ResourceState::ComputeShaderResource: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + break; + case ResourceState::CopySrc: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + break; + case ResourceState::CopyDst: + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + break; + case ResourceState::CASShaderUAV: + case ResourceState::PixelShaderUAV: + if (before_state == ResourceState::CASShaderUAV || before_state == ResourceState::PixelShaderUAV) + { + // No state transition, but probably want a barrier instead. + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + // pResource is a common initial member, so no need to set again. + } + else + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + break; + default: + pxAssert(false); + break; + } + + if (num_barriers == 2) + { + barriers[1].Transition.pResource = m_resource.get(); + barriers[1].Type = barriers[0].Type; + barriers[1].Flags = barriers[0].Flags; + if (before_state == ResourceState::DepthReadStencil) + barriers[1].Transition.StateAfter = barriers[0].Transition.StateAfter; + else // after_state == ResourceState::DepthReadStencil + barriers[1].Transition.StateBefore = barriers[0].Transition.StateBefore; + } + + cmdlist.list4->ResourceBarrier(num_barriers, barriers); + } } void GSTexture12::CommitClear() @@ -774,22 +1147,21 @@ void GSTexture12::CommitClear() return; GSDevice12::GetInstance()->EndRenderPass(); - CommitClear(GSDevice12::GetInstance()->GetCommandList()); } -void GSTexture12::CommitClear(ID3D12GraphicsCommandList* cmdlist) +void GSTexture12::CommitClear(const D3D12CommandList& cmdlist) { if (IsDepthStencil()) { - TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); - cmdlist->ClearDepthStencilView( + TransitionToState(cmdlist, ResourceState::DepthWriteStencil); + cmdlist.list4->ClearDepthStencilView( GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_clear_value.depth, 0, 0, nullptr); } else { - TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(GetWriteDescriptor(), GSVector4::unorm8(m_clear_value.color).v, 0, nullptr); + TransitionToState(cmdlist, ResourceState::RenderTarget); + cmdlist.list4->ClearRenderTargetView(GetWriteDescriptor(), GSVector4::unorm8(m_clear_value.color).v, 0, nullptr); } SetState(GSTexture::State::Dirty); @@ -816,14 +1188,23 @@ std::unique_ptr GSDownloadTexture12::Create(u32 width, u32 D3D12MA::ALLOCATION_DESC allocation_desc = {}; allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1, - DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; + const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}}; wil::com_ptr_nothrow allocation; wil::com_ptr_nothrow buffer; - HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc, - D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); + HRESULT hr; + if (GSDevice12::GetInstance()->UseEnhancedBarriers()) + { + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocation_desc, &resource_desc.desc1, + D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); + } + else + { + hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc.desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put())); + } if (FAILED(hr)) { Console.Error("(GSDownloadTexture12::Create) CreateResource() failed with HRESULT %08X", hr); @@ -861,7 +1242,7 @@ void GSDownloadTexture12::CopyFromTexture( if (IsMapped()) Unmap(); - ID3D12GraphicsCommandList* cmdlist = GSDevice12::GetInstance()->GetCommandList(); + const D3D12CommandList& cmdlist = GSDevice12::GetInstance()->GetCommandList(); GL_INS("ReadbackTexture: {%d,%d} %ux%u", src.left, src.top, src.width(), src.height()); D3D12_TEXTURE_COPY_LOCATION srcloc; @@ -879,17 +1260,17 @@ void GSDownloadTexture12::CopyFromTexture( dstloc.PlacedFootprint.Footprint.Depth = 1; dstloc.PlacedFootprint.Footprint.RowPitch = m_current_pitch; - const D3D12_RESOURCE_STATES old_layout = tex12->GetResourceState(); - if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) - tex12->TransitionSubresourceToState(cmdlist, src_level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE); + const GSTexture12::ResourceState old_layout = tex12->GetResourceState(); + if (old_layout != GSTexture12::ResourceState::CopySrc) + tex12->TransitionSubresourceToState(cmdlist, src_level, old_layout, GSTexture12::ResourceState::CopySrc); // TODO: Rules for depth buffers here? const D3D12_BOX srcbox{static_cast(src.left), static_cast(src.top), 0u, static_cast(src.right), static_cast(src.bottom), 1u}; - cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox); + cmdlist.list4->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox); - if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) - tex12->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout); + if (old_layout != GSTexture12::ResourceState::CopySrc) + tex12->TransitionSubresourceToState(cmdlist, src_level, GSTexture12::ResourceState::CopySrc, old_layout); m_copy_fence_value = GSDevice12::GetInstance()->GetCurrentFenceValue(); m_needs_flush = true; diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.h b/pcsx2/GS/Renderers/DX12/GSTexture12.h index df92af3f91..7ed880f81d 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.h +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.h @@ -16,9 +16,27 @@ namespace D3D12MA class Allocation; } +struct D3D12CommandList; + class GSTexture12 final : public GSTexture { public: + enum class ResourceState : u32 + { + Undefined, + Present, + RenderTarget, + DepthWriteStencil, + DepthReadStencil, + PixelShaderResource, + ComputeShaderResource, + CopySrc, + CopyDst, + CASShaderUAV, // No Clear UAV Sync + PixelShaderUAV, + Count + }; + ~GSTexture12() override; static std::unique_ptr Create(Type type, Format format, int width, int height, int levels, @@ -26,14 +44,14 @@ public: DXGI_FORMAT uav_format); static std::unique_ptr Adopt(wil::com_ptr_nothrow resource, Type type, Format format, int width, int height, int levels, DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, - DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, D3D12_RESOURCE_STATES resource_state); + DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, ResourceState resource_state); __fi const D3D12DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } __fi const D3D12DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; } __fi const D3D12DescriptorHandle& GetReadDepthViewDescriptor() const { return m_read_dsv_descriptor; } __fi const D3D12DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; } __fi const D3D12DescriptorHandle& GetFBLDescriptor() const { return m_fbl_descriptor; } - __fi D3D12_RESOURCE_STATES GetResourceState() const { return m_resource_state; } + __fi ResourceState GetResourceState() const { return m_resource_state; } __fi DXGI_FORMAT GetDXGIFormat() const { return m_dxgi_format; } __fi ID3D12Resource* GetResource() const { return m_resource.get(); } __fi ID3D12Resource* GetFBLResource() const { return m_resource_fbl.get(); } @@ -49,15 +67,15 @@ public: void SetDebugName(std::string_view name) override; #endif - void TransitionToState(D3D12_RESOURCE_STATES state); + void TransitionToState(ResourceState state); void CommitClear(); - void CommitClear(ID3D12GraphicsCommandList* cmdlist); + void CommitClear(const D3D12CommandList& cmdlist); void Destroy(bool defer = true); - void TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state); - void TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, int level, D3D12_RESOURCE_STATES before_state, - D3D12_RESOURCE_STATES after_state) const; + void TransitionToState(const D3D12CommandList&, ResourceState state); + void TransitionSubresourceToState(const D3D12CommandList& cmdlist, int level, ResourceState before_state, + ResourceState after_state) const; // Call when the texture is bound to the pipeline, or read from in a copy. __fi void SetUseFenceCounter(u64 val) { m_use_fence_counter = val; } @@ -75,7 +93,7 @@ private: wil::com_ptr_nothrow allocation, const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& ro_dsv_descriptor, const D3D12DescriptorHandle& uav_descriptor, const D3D12DescriptorHandle& fbl_descriptor, - WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state); + WriteDescriptorType wdtype, bool simultaneous_texture, ResourceState resource_state); static bool CreateSRVDescriptor( ID3D12Resource* resource, u32 levels, DXGI_FORMAT format, D3D12DescriptorHandle* dh); @@ -83,7 +101,7 @@ private: static bool CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh, bool read_only); static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh); - ID3D12GraphicsCommandList* GetCommandBufferForUpdate(); + const D3D12CommandList& GetCommandBufferForUpdate(); ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const; void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const; @@ -99,7 +117,11 @@ private: WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None; DXGI_FORMAT m_dxgi_format = DXGI_FORMAT_UNKNOWN; - D3D12_RESOURCE_STATES m_resource_state = D3D12_RESOURCE_STATE_COMMON; + ResourceState m_resource_state = ResourceState::Undefined; + + // With legacy barriers, an aliased resource is used as the feedback shader resource. + // With enhanced barriers, the layout is always COMMON, but can use the main resource for feedback. + bool m_simultaneous_tex; // Contains the fence counter when the texture was last used. // When this matches the current fence counter, the texture was used this command buffer.