mirror of
https://github.com/PCSX2/pcsx2.git
synced 2026-01-31 01:15:24 +01:00
4465 lines
154 KiB
C++
4465 lines
154 KiB
C++
// SPDX-FileCopyrightText: 2002-2026 PCSX2 Dev Team
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
#include "GS/GS.h"
|
|
#include "GS/GSGL.h"
|
|
#include "GS/GSPerfMon.h"
|
|
#include "GS/GSUtil.h"
|
|
#include "GS/Renderers/DX11/D3D.h"
|
|
#include "GS/Renderers/DX12/GSDevice12.h"
|
|
#include "GS/Renderers/DX12/D3D12Builders.h"
|
|
#include "GS/Renderers/DX12/D3D12ShaderCache.h"
|
|
#include "Host.h"
|
|
#include "ShaderCacheVersion.h"
|
|
|
|
#include "common/Console.h"
|
|
#include "common/BitUtils.h"
|
|
#include "common/Error.h"
|
|
#include "common/HostSys.h"
|
|
#include "common/ScopedGuard.h"
|
|
#include "common/SmallString.h"
|
|
#include "common/StringUtil.h"
|
|
#include "common/FileSystem.h"
|
|
#include "common/Path.h"
|
|
|
|
#include "D3D12MemAlloc.h"
|
|
#include "imgui.h"
|
|
|
|
#include <sstream>
|
|
#include <limits>
|
|
|
|
#ifdef ENABLE_OGL_DEBUG
|
|
#define USE_PIX
|
|
#include "WinPixEventRuntime/pix3.h"
|
|
|
|
static u32 s_debug_scope_depth = 0;
|
|
#endif
|
|
|
|
static bool IsDATMConvertShader(ShaderConvert i)
|
|
{
|
|
return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1 || i == ShaderConvert::DATM_0_RTA_CORRECTION || i == ShaderConvert::DATM_1_RTA_CORRECTION);
|
|
}
|
|
static bool IsDATEModePrimIDInit(u32 flag)
|
|
{
|
|
return flag == 1 || flag == 2;
|
|
}
|
|
|
|
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, 3> s_primitive_topology_mapping = {
|
|
{D3D_PRIMITIVE_TOPOLOGY_POINTLIST, D3D_PRIMITIVE_TOPOLOGY_LINELIST, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST}};
|
|
|
|
static constexpr std::array<float, 4> s_present_clear_color = {};
|
|
|
|
static D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE GetLoadOpForTexture(GSTexture12* tex)
|
|
{
|
|
if (!tex)
|
|
return D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS;
|
|
|
|
// clang-format off
|
|
switch (tex->GetState())
|
|
{
|
|
case GSTexture12::State::Cleared: tex->SetState(GSTexture::State::Dirty); return D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
|
|
case GSTexture12::State::Invalidated: tex->SetState(GSTexture::State::Dirty); return D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
|
|
case GSTexture12::State::Dirty: return D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
|
|
default: return D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
|
|
}
|
|
// clang-format on
|
|
}
|
|
|
|
GSDevice12::ShaderMacro::ShaderMacro()
|
|
{
|
|
mlist.emplace_back("DX12", "1");
|
|
}
|
|
|
|
void GSDevice12::ShaderMacro::AddMacro(const char* n, int d)
|
|
{
|
|
AddMacro(n, std::to_string(d));
|
|
}
|
|
|
|
void GSDevice12::ShaderMacro::AddMacro(const char* n, std::string d)
|
|
{
|
|
mlist.emplace_back(n, std::move(d));
|
|
}
|
|
|
|
D3D_SHADER_MACRO* GSDevice12::ShaderMacro::GetPtr(void)
|
|
{
|
|
mout.clear();
|
|
|
|
for (auto& i : mlist)
|
|
mout.emplace_back(i.name.c_str(), i.def.c_str());
|
|
|
|
mout.emplace_back(nullptr, nullptr);
|
|
return (D3D_SHADER_MACRO*)mout.data();
|
|
}
|
|
|
|
GSDevice12::GSDevice12() = default;
|
|
|
|
GSDevice12::~GSDevice12() = default;
|
|
|
|
GSDevice12::ComPtr<ID3DBlob> GSDevice12::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc)
|
|
{
|
|
ComPtr<ID3DBlob> blob;
|
|
ComPtr<ID3DBlob> error_blob;
|
|
const HRESULT hr = D3D12SerializeRootSignature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.put(), error_blob.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12SerializeRootSignature() failed: %08X", hr);
|
|
if (error_blob)
|
|
Console.Error("%s", error_blob->GetBufferPointer());
|
|
|
|
return {};
|
|
}
|
|
|
|
return blob;
|
|
}
|
|
|
|
GSDevice12::ComPtr<ID3D12RootSignature> GSDevice12::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc)
|
|
{
|
|
ComPtr<ID3DBlob> blob = SerializeRootSignature(desc);
|
|
if (!blob)
|
|
return {};
|
|
|
|
ComPtr<ID3D12RootSignature> rs;
|
|
const HRESULT hr =
|
|
m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: CreateRootSignature() failed: %08X", hr);
|
|
return {};
|
|
}
|
|
|
|
return rs;
|
|
}
|
|
|
|
bool GSDevice12::SupportsTextureFormat(DXGI_FORMAT format)
|
|
{
|
|
constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE;
|
|
|
|
D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format};
|
|
return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) &&
|
|
(support.Support1 & required) == required;
|
|
}
|
|
|
|
bool GSDevice12::SupportsProgrammableSamplePositions()
|
|
{
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS2 options = {};
|
|
if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options, sizeof(options))))
|
|
return options.ProgrammableSamplePositionsTier != D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED;
|
|
|
|
return false;
|
|
}
|
|
|
|
u32 GSDevice12::GetAdapterVendorID() const
|
|
{
|
|
if (!m_adapter)
|
|
return 0;
|
|
|
|
DXGI_ADAPTER_DESC desc;
|
|
if (FAILED(m_adapter->GetDesc(&desc)))
|
|
return 0;
|
|
|
|
return desc.VendorId;
|
|
}
|
|
|
|
uint SDKVersion(const std::string& path)
|
|
{
|
|
// The Agility SDK version is embeded as the minor file version.
|
|
// This is only true for the redist files, not the OS files.
|
|
// Alternativly, D3D12Core also exports its D3D12SDKVersion.
|
|
std::wstring wpath = FileSystem::GetWin32Path(path);
|
|
|
|
const DWORD size = GetFileVersionInfoSizeW(wpath.c_str(), nullptr);
|
|
if (size == 0)
|
|
return 0;
|
|
|
|
BOOL ret;
|
|
|
|
std::vector<char> info_data(size);
|
|
ret = GetFileVersionInfoW(wpath.c_str(), 0, size, info_data.data());
|
|
if (!ret)
|
|
return 0;
|
|
|
|
uint len;
|
|
VS_FIXEDFILEINFO* file_ver;
|
|
ret = VerQueryValueW(info_data.data(), L"\\", reinterpret_cast<void**>(&file_ver), &len);
|
|
if (!ret)
|
|
return 0;
|
|
|
|
return file_ver->dwFileVersionMS & 0xFFFF;
|
|
}
|
|
|
|
void GSDevice12::LoadAgilitySDK()
|
|
{
|
|
static bool agility_loaded = false;
|
|
if (agility_loaded)
|
|
return;
|
|
|
|
// On older versions of Windows 10 (example 2019 LTSC) D3D12GetInterface may fail because it doesn't exist,
|
|
// in such case we can check if D3D12GetInterface exists first.
|
|
const HMODULE d3d12 = GetModuleHandleW(L"d3d12.dll");
|
|
if (!d3d12)
|
|
return;
|
|
|
|
using PFN_D3D12GetInterface = HRESULT(WINAPI*)(REFCLSID rclsid, REFIID riid, void** ppv);
|
|
auto pD3D12GetInterface = reinterpret_cast<PFN_D3D12GetInterface>(GetProcAddress(d3d12, "D3D12GetInterface"));
|
|
if (!pD3D12GetInterface)
|
|
{
|
|
Console.Error("D3D12: Agility SDK configuration is not available");
|
|
return;
|
|
}
|
|
|
|
// See https://microsoft.github.io/DirectX-Specs/d3d/IndependentDevices.html
|
|
ComPtr<ID3D12SDKConfiguration1> sdk_configuration;
|
|
HRESULT hr;
|
|
hr = pD3D12GetInterface(CLSID_D3D12SDKConfiguration, IID_PPV_ARGS(sdk_configuration.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Agility SDK configuration is not available");
|
|
return;
|
|
}
|
|
|
|
std::string sdk_path = Path::Combine(Path::GetDirectory(FileSystem::GetProgramPath()), "\\D3D12\\");
|
|
std::string core_path = Path::Combine(sdk_path, "D3D12Core.dll");
|
|
if (!FileSystem::FileExists(core_path.c_str()))
|
|
return;
|
|
|
|
const uint agility_version = SDKVersion(core_path);
|
|
if (agility_version == 0)
|
|
return;
|
|
|
|
ComPtr<ID3D12DeviceFactory> device_factory;
|
|
// CreateDeviceFactory seems to use a utf8 string for the path.
|
|
// If the system has a newer SDK, then the system SDK seems to be returned instead.
|
|
hr = sdk_configuration->CreateDeviceFactory(agility_version,
|
|
StringUtil::WideStringToUTF8String(FileSystem::GetWin32Path(sdk_path)).c_str(), IID_PPV_ARGS(device_factory.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.ErrorFmt("D3D12: Unable to load provided Agility SDK {:08X}", hr);
|
|
return;
|
|
}
|
|
|
|
// Windows 10 (and older drivers on 11) will apply to the global state in ID3D12DeviceFactory::CreateDevice().
|
|
// To get consistant behaviour across all systems, always apply the global state.
|
|
// This also allows us to use the normal D3D12*() methods with the loaded agility SDK.
|
|
hr = device_factory->ApplyToGlobalState();
|
|
if (FAILED(hr))
|
|
{
|
|
Console.ErrorFmt("D3D12: Unable to apply provided Agility SDK {:08X}", hr);
|
|
return;
|
|
}
|
|
|
|
agility_loaded = true;
|
|
}
|
|
|
|
bool GSDevice12::CreateDevice(u32& vendor_id)
|
|
{
|
|
bool enable_debug_layer = GSConfig.UseDebugDevice;
|
|
|
|
m_dxgi_factory = D3D::CreateFactory(GSConfig.UseDebugDevice);
|
|
if (!m_dxgi_factory)
|
|
return false;
|
|
|
|
m_adapter = D3D::GetAdapterByName(m_dxgi_factory.get(), GSConfig.Adapter);
|
|
vendor_id = GetAdapterVendorID();
|
|
|
|
HRESULT hr;
|
|
|
|
// Load the Agility SDK
|
|
LoadAgilitySDK();
|
|
|
|
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
|
|
if (enable_debug_layer)
|
|
{
|
|
ComPtr<ID3D12Debug1> debug12;
|
|
hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.put()));
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
debug12->EnableDebugLayer();
|
|
debug12->SetEnableGPUBasedValidation(true);
|
|
}
|
|
else
|
|
{
|
|
Console.Error("D3D12: Debug layer requested but not available.");
|
|
enable_debug_layer = false;
|
|
}
|
|
}
|
|
|
|
// Intel Haswell doesn't actually support DX12 even tho the device is created which results in a crash,
|
|
// to get around this check if device can be created using feature level 12 (skylake+).
|
|
const bool isIntel = (vendor_id == 0x163C || vendor_id == 0x8086 || vendor_id == 0x8087);
|
|
|
|
// Create the actual device.
|
|
hr = D3D12CreateDevice(m_adapter.get(), isIntel ? D3D_FEATURE_LEVEL_12_0 : D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Failed to create device: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
if (!m_adapter)
|
|
{
|
|
const LUID luid(m_device->GetAdapterLuid());
|
|
if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.put()))))
|
|
Console.Error("D3D12: Failed to get lookup adapter by device LUID");
|
|
}
|
|
|
|
ComPtr<ID3D12DeviceConfiguration> config = m_device.try_query<ID3D12DeviceConfiguration>();
|
|
int sdkVersion = 0;
|
|
if (config)
|
|
{
|
|
sdkVersion = config->GetDesc().SDKVersion;
|
|
Console.WriteLnFmt("D3D12: Agility version: {}", sdkVersion);
|
|
}
|
|
|
|
if (enable_debug_layer)
|
|
{
|
|
ComPtr<ID3D12InfoQueue> info_queue = m_device.try_query<ID3D12InfoQueue>();
|
|
if (info_queue)
|
|
{
|
|
if (IsDebuggerPresent())
|
|
{
|
|
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
|
|
info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
|
|
}
|
|
|
|
D3D12_INFO_QUEUE_FILTER filter = {};
|
|
std::array<D3D12_MESSAGE_ID, 6> id_list{
|
|
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
|
|
D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,
|
|
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET,
|
|
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH,
|
|
D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE,
|
|
// The current OS version of D3D12 (616) has a validation bug
|
|
// This is fixed with Agility 1.618.4.
|
|
// For now, disable this warning untill the OS updates.
|
|
D3D12_MESSAGE_ID_INCOMPATIBLE_BARRIER_LAYOUT,
|
|
};
|
|
filter.DenyList.NumIDs = static_cast<UINT>(sdkVersion < 618 ? id_list.size() : id_list.size() - 1);
|
|
filter.DenyList.pIDList = id_list.data();
|
|
info_queue->PushStorageFilter(&filter);
|
|
}
|
|
}
|
|
|
|
const D3D12_COMMAND_QUEUE_DESC queue_desc = {
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, D3D12_COMMAND_QUEUE_FLAG_NONE};
|
|
hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Failed to create command queue: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
|
|
allocatorDesc.pDevice = m_device.get();
|
|
allocatorDesc.pAdapter = m_adapter.get();
|
|
allocatorDesc.Flags =
|
|
D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED |
|
|
D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/;
|
|
|
|
hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: CreateAllocator() failed with HRESULT %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Failed to create fence: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
|
|
if (m_fence_event == NULL)
|
|
{
|
|
Console.Error("D3D12: Failed to create fence event: %08X", GetLastError());
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CreateDescriptorHeaps()
|
|
{
|
|
static constexpr size_t MAX_SRVS = 32768;
|
|
static constexpr size_t MAX_RTVS = 16384;
|
|
static constexpr size_t MAX_DSVS = 16384;
|
|
static constexpr size_t MAX_CPU_SAMPLERS = 1024;
|
|
|
|
if (!m_descriptor_heap_manager.Create(m_device.get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_SRVS, false) ||
|
|
!m_rtv_heap_manager.Create(m_device.get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_RTVS, false) ||
|
|
!m_dsv_heap_manager.Create(m_device.get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_DSVS, false) ||
|
|
!m_sampler_heap_manager.Create(m_device.get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_CPU_SAMPLERS, false))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// Allocate null SRV descriptor for unbound textures.
|
|
constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {
|
|
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
|
|
|
|
if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor))
|
|
{
|
|
pxFailRel("Failed to allocate null descriptor");
|
|
return false;
|
|
}
|
|
|
|
m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle);
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CreateCommandLists()
|
|
{
|
|
static constexpr size_t MAX_GPU_SRVS = 32768;
|
|
static constexpr size_t MAX_GPU_SAMPLERS = 2048;
|
|
|
|
for (u32 i = 0; i < NUM_COMMAND_LISTS; i++)
|
|
{
|
|
CommandListResources& res = m_command_lists[i];
|
|
HRESULT hr;
|
|
|
|
for (u32 i = 0; i < 2; i++)
|
|
{
|
|
hr = m_device->CreateCommandAllocator(
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(res.command_allocators[i].put()));
|
|
pxAssertRel(SUCCEEDED(hr), "Create command allocator");
|
|
if (FAILED(hr))
|
|
return false;
|
|
|
|
if (m_enhanced_barriers)
|
|
{
|
|
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(),
|
|
nullptr, IID_PPV_ARGS(res.command_lists[i].list7.put()));
|
|
res.command_lists[i].list4 = res.command_lists[i].list7;
|
|
}
|
|
else
|
|
{
|
|
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(),
|
|
nullptr, IID_PPV_ARGS(res.command_lists[i].list4.put()));
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Failed to create command list: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
// Close the command lists, since the first thing we do is reset them.
|
|
hr = res.command_lists[i].list4->Close();
|
|
pxAssertRel(SUCCEEDED(hr), "Closing new command list failed");
|
|
if (FAILED(hr))
|
|
return false;
|
|
}
|
|
|
|
if (!res.descriptor_allocator.Create(m_device.get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_GPU_SRVS))
|
|
{
|
|
Console.Error("D3D12: Failed to create per frame descriptor allocator");
|
|
return false;
|
|
}
|
|
|
|
if (!res.sampler_allocator.Create(m_device.get(), MAX_GPU_SAMPLERS))
|
|
{
|
|
Console.Error("D3D12: Failed to create per frame sampler allocator");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
MoveToNextCommandList();
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::MoveToNextCommandList()
|
|
{
|
|
const int next_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
|
|
|
|
// We may have to wait if this command list hasn't finished on the GPU.
|
|
CommandListResources& res = m_command_lists[next_command_list];
|
|
WaitForFence(res.ready_fence_value, false);
|
|
|
|
m_current_command_list = next_command_list;
|
|
m_current_fence_value++;
|
|
res.ready_fence_value = m_current_fence_value;
|
|
res.init_command_list_used = false;
|
|
|
|
// Begin command list.
|
|
res.command_allocators[1]->Reset();
|
|
res.command_lists[1].list4->Reset(res.command_allocators[1].get(), nullptr);
|
|
res.descriptor_allocator.Reset();
|
|
if (res.sampler_allocator.ShouldReset())
|
|
res.sampler_allocator.Reset();
|
|
|
|
if (res.has_timestamp_query)
|
|
{
|
|
// readback timestamp from the last time this cmdlist was used.
|
|
// we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist.
|
|
const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
|
|
const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)};
|
|
void* map;
|
|
HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
u64 timestamps[2];
|
|
std::memcpy(timestamps, static_cast<const u8*>(map) + offset, sizeof(timestamps));
|
|
m_accumulated_gpu_time +=
|
|
static_cast<float>(static_cast<double>(timestamps[1] - timestamps[0]) / m_timestamp_frequency);
|
|
|
|
const D3D12_RANGE write_range = {};
|
|
m_timestamp_query_buffer->Unmap(0, &write_range);
|
|
}
|
|
else
|
|
{
|
|
Console.Warning("D3D12: Map() for timestamp query failed: %08X", hr);
|
|
}
|
|
}
|
|
|
|
res.has_timestamp_query = m_gpu_timing_enabled;
|
|
if (m_gpu_timing_enabled)
|
|
{
|
|
res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
|
|
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST);
|
|
}
|
|
|
|
ID3D12DescriptorHeap* heaps[2] = {
|
|
res.descriptor_allocator.GetDescriptorHeap(), res.sampler_allocator.GetDescriptorHeap()};
|
|
res.command_lists[1].list4->SetDescriptorHeaps(std::size(heaps), heaps);
|
|
|
|
m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value));
|
|
}
|
|
|
|
const D3D12CommandList& GSDevice12::GetInitCommandList()
|
|
{
|
|
CommandListResources& res = m_command_lists[m_current_command_list];
|
|
if (!res.init_command_list_used)
|
|
{
|
|
[[maybe_unused]] HRESULT hr = res.command_allocators[0]->Reset();
|
|
pxAssertMsg(SUCCEEDED(hr), "Reset init command allocator failed");
|
|
|
|
res.command_lists[0].list4->Reset(res.command_allocators[0].get(), nullptr);
|
|
pxAssertMsg(SUCCEEDED(hr), "Reset init command list failed");
|
|
res.init_command_list_used = true;
|
|
}
|
|
|
|
return res.command_lists[0];
|
|
}
|
|
|
|
bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion)
|
|
{
|
|
CommandListResources& res = m_command_lists[m_current_command_list];
|
|
HRESULT hr;
|
|
|
|
if (res.has_timestamp_query)
|
|
{
|
|
// write the timestamp back at the end of the cmdlist
|
|
res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
|
|
(m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1);
|
|
res.command_lists[1].list4->ResolveQueryData(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
|
|
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, NUM_TIMESTAMP_QUERIES_PER_CMDLIST,
|
|
m_timestamp_query_buffer.get(), m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
|
|
}
|
|
|
|
if (res.init_command_list_used)
|
|
{
|
|
hr = res.command_lists[0].list4->Close();
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Closing init command list failed with HRESULT %08X", hr);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Close and queue command list.
|
|
hr = res.command_lists[1].list4->Close();
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: Closing main command list failed with HRESULT %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
if (res.init_command_list_used)
|
|
{
|
|
const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].list4.get(), res.command_lists[1].list4.get()};
|
|
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
|
|
}
|
|
else
|
|
{
|
|
const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].list4.get()};
|
|
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
|
|
}
|
|
|
|
// Update fence when GPU has completed.
|
|
hr = m_command_queue->Signal(m_fence.get(), res.ready_fence_value);
|
|
pxAssertRel(SUCCEEDED(hr), "Signal fence");
|
|
|
|
MoveToNextCommandList();
|
|
if (wait_for_completion != WaitType::None)
|
|
WaitForFence(res.ready_fence_value, wait_for_completion == WaitType::Spin);
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::InvalidateSamplerGroups()
|
|
{
|
|
for (CommandListResources& res : m_command_lists)
|
|
res.sampler_allocator.InvalidateCache();
|
|
}
|
|
|
|
void GSDevice12::DeferObjectDestruction(ID3D12DeviceChild* resource)
|
|
{
|
|
if (!resource)
|
|
return;
|
|
|
|
resource->AddRef();
|
|
m_command_lists[m_current_command_list].pending_resources.emplace_back(nullptr, resource);
|
|
}
|
|
|
|
void GSDevice12::DeferResourceDestruction(D3D12MA::Allocation* allocation, ID3D12Resource* resource)
|
|
{
|
|
if (!resource)
|
|
return;
|
|
|
|
if (allocation)
|
|
allocation->AddRef();
|
|
|
|
resource->AddRef();
|
|
m_command_lists[m_current_command_list].pending_resources.emplace_back(allocation, resource);
|
|
}
|
|
|
|
void GSDevice12::DeferDescriptorDestruction(D3D12DescriptorHeapManager& manager, u32 index)
|
|
{
|
|
m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, index);
|
|
}
|
|
|
|
void GSDevice12::DeferDescriptorDestruction(D3D12DescriptorHeapManager& manager, D3D12DescriptorHandle* handle)
|
|
{
|
|
if (handle->index == D3D12DescriptorHandle::INVALID_INDEX)
|
|
return;
|
|
|
|
m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, handle->index);
|
|
handle->Clear();
|
|
}
|
|
|
|
void GSDevice12::DestroyPendingResources(CommandListResources& cmdlist)
|
|
{
|
|
for (const auto& dd : cmdlist.pending_descriptors)
|
|
dd.first.Free(dd.second);
|
|
cmdlist.pending_descriptors.clear();
|
|
|
|
for (const auto& it : cmdlist.pending_resources)
|
|
{
|
|
it.second->Release();
|
|
if (it.first)
|
|
it.first->Release();
|
|
}
|
|
cmdlist.pending_resources.clear();
|
|
}
|
|
|
|
void GSDevice12::WaitForFence(u64 fence, bool spin)
|
|
{
|
|
if (m_completed_fence_value >= fence)
|
|
return;
|
|
|
|
if (spin)
|
|
{
|
|
u64 value;
|
|
while ((value = m_fence->GetCompletedValue()) < fence)
|
|
ShortSpin();
|
|
m_completed_fence_value = value;
|
|
}
|
|
else
|
|
{
|
|
// Try non-blocking check.
|
|
m_completed_fence_value = m_fence->GetCompletedValue();
|
|
if (m_completed_fence_value < fence)
|
|
{
|
|
// Fall back to event.
|
|
HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event);
|
|
pxAssertRel(SUCCEEDED(hr), "Set fence event on completion");
|
|
WaitForSingleObject(m_fence_event, INFINITE);
|
|
m_completed_fence_value = m_fence->GetCompletedValue();
|
|
}
|
|
}
|
|
|
|
// Release resources for as many command lists which have completed.
|
|
u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
|
|
for (u32 i = 0; i < NUM_COMMAND_LISTS; i++)
|
|
{
|
|
CommandListResources& res = m_command_lists[index];
|
|
if (m_completed_fence_value < res.ready_fence_value)
|
|
break;
|
|
|
|
DestroyPendingResources(res);
|
|
index = (index + 1) % NUM_COMMAND_LISTS;
|
|
}
|
|
}
|
|
|
|
void GSDevice12::WaitForGPUIdle()
|
|
{
|
|
u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
|
|
for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++)
|
|
{
|
|
WaitForFence(m_command_lists[index].ready_fence_value, false);
|
|
index = (index + 1) % NUM_COMMAND_LISTS;
|
|
}
|
|
}
|
|
|
|
bool GSDevice12::CreateTimestampQuery()
|
|
{
|
|
constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS;
|
|
constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT;
|
|
|
|
const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT};
|
|
HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: CreateQueryHeap() for timestamp failed with %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK};
|
|
const D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1,
|
|
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
|
|
if (m_enhanced_barriers)
|
|
hr = m_allocator->CreateResource3(&allocation_desc, &resource_desc.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr,
|
|
0, nullptr, m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put()));
|
|
else
|
|
hr = m_allocator->CreateResource(&allocation_desc, &resource_desc.desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
|
m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: CreateResource() for timestamp failed with %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
u64 frequency;
|
|
hr = m_command_queue->GetTimestampFrequency(&frequency);
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: GetTimestampFrequency() failed: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
m_timestamp_frequency = static_cast<double>(frequency) / 1000.0;
|
|
return true;
|
|
}
|
|
|
|
float GSDevice12::GetAndResetAccumulatedGPUTime()
|
|
{
|
|
const float time = m_accumulated_gpu_time;
|
|
m_accumulated_gpu_time = 0.0f;
|
|
return time;
|
|
}
|
|
|
|
bool GSDevice12::SetGPUTimingEnabled(bool enabled)
|
|
{
|
|
m_gpu_timing_enabled = enabled;
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
|
|
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
|
|
{
|
|
// Try to place the fixed index buffer in GPU local memory.
|
|
// Use the staging buffer to copy into it.
|
|
const D3D12_RESOURCE_DESCU rd = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
|
|
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
|
|
|
|
const D3D12MA::ALLOCATION_DESC cpu_ad = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
|
|
|
ComPtr<ID3D12Resource> cpu_buffer;
|
|
ComPtr<D3D12MA::Allocation> cpu_allocation;
|
|
HRESULT hr;
|
|
if (m_enhanced_barriers)
|
|
hr = m_allocator->CreateResource3(
|
|
&cpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
|
|
else
|
|
hr = m_allocator->CreateResource(
|
|
&cpu_ad, &rd.desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
|
|
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
|
|
if (FAILED(hr))
|
|
return false;
|
|
|
|
static constexpr const D3D12_RANGE read_range = {};
|
|
const D3D12_RANGE write_range = {0, size};
|
|
void* mapped;
|
|
hr = cpu_buffer->Map(0, &read_range, &mapped);
|
|
pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer");
|
|
if (FAILED(hr))
|
|
return false;
|
|
fill_callback(mapped);
|
|
cpu_buffer->Unmap(0, &write_range);
|
|
|
|
const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT};
|
|
if (m_enhanced_barriers)
|
|
hr = m_allocator->CreateResource3(
|
|
&gpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
|
else
|
|
hr = m_allocator->CreateResource(
|
|
&gpu_ad, &rd.desc, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
|
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
|
|
if (FAILED(hr))
|
|
return false;
|
|
|
|
GetInitCommandList().list4->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
|
|
|
|
if (m_enhanced_barriers)
|
|
{
|
|
const D3D12_BUFFER_BARRIER barrier = {D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_INDEX_INPUT,
|
|
D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDEX_BUFFER, *gpu_buffer, 0, size};
|
|
const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_BUFFER, .NumBarriers = 1, .pBufferBarriers = &barrier};
|
|
GetInitCommandList().list7->Barrier(1, &group);
|
|
}
|
|
else
|
|
{
|
|
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
|
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb.Transition.pResource = *gpu_buffer;
|
|
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
|
|
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
|
|
GetInitCommandList().list4->ResourceBarrier(1, &rb);
|
|
}
|
|
|
|
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
|
|
return true;
|
|
}
|
|
|
|
RenderAPI GSDevice12::GetRenderAPI() const
|
|
{
|
|
return RenderAPI::D3D12;
|
|
}
|
|
|
|
bool GSDevice12::HasSurface() const
|
|
{
|
|
return static_cast<bool>(m_swap_chain);
|
|
}
|
|
|
|
bool GSDevice12::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
|
{
|
|
if (!GSDevice::Create(vsync_mode, allow_present_throttle))
|
|
return false;
|
|
|
|
u32 vendor_id = 0;
|
|
if (!CreateDevice(vendor_id))
|
|
return false;
|
|
|
|
if (!CheckFeatures(vendor_id))
|
|
{
|
|
Console.Error("D3D12: Your GPU does not support the required D3D12 features.");
|
|
return false;
|
|
}
|
|
|
|
m_name = D3D::GetAdapterName(m_adapter.get());
|
|
|
|
if (!CreateDescriptorHeaps() || !CreateCommandLists() || !CreateTimestampQuery())
|
|
return false;
|
|
|
|
if (!AcquireWindow(true) || (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()))
|
|
return false;
|
|
|
|
if (!CreateNullTexture())
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to create dummy texture");
|
|
return false;
|
|
}
|
|
|
|
{
|
|
std::optional<std::string> shader = ReadShaderSource("shaders/dx11/tfx.fx");
|
|
if (!shader.has_value())
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/tfx.fx.");
|
|
return false;
|
|
}
|
|
|
|
m_tfx_source = std::move(*shader);
|
|
}
|
|
|
|
if (!m_shader_cache.Open(m_feature_level, GSConfig.UseDebugDevice))
|
|
Console.Warning("D3D12: Shader cache failed to open.");
|
|
|
|
if (!CreateRootSignatures())
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to create pipeline layouts");
|
|
return false;
|
|
}
|
|
|
|
if (!CreateBuffers())
|
|
return false;
|
|
|
|
if (!CompileConvertPipelines() || !CompilePresentPipelines() || !CompileInterlacePipelines() ||
|
|
!CompileMergePipelines() || !CompilePostProcessingPipelines())
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to compile utility pipelines");
|
|
return false;
|
|
}
|
|
|
|
if (!CompileCASPipelines())
|
|
return false;
|
|
|
|
if (!CompileImGuiPipeline())
|
|
return false;
|
|
|
|
InitializeState();
|
|
InitializeSamplers();
|
|
// Reference stencil isn't tied to pipeline, so we can set it once and leave it.
|
|
SetStencilRef(1);
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::Destroy()
|
|
{
|
|
GSDevice::Destroy();
|
|
|
|
if (GetCommandList().list4)
|
|
{
|
|
EndRenderPass();
|
|
ExecuteCommandList(true);
|
|
}
|
|
|
|
DestroySwapChain();
|
|
DestroyResources();
|
|
}
|
|
|
|
void GSDevice12::SetVSyncMode(GSVSyncMode mode, bool allow_present_throttle)
|
|
{
|
|
m_allow_present_throttle = allow_present_throttle;
|
|
|
|
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
|
|
if (mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
|
|
{
|
|
WARNING_LOG("D3D12: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
|
|
mode = GSVSyncMode::FIFO;
|
|
}
|
|
|
|
if (m_vsync_mode == mode)
|
|
return;
|
|
|
|
const u32 old_buffer_count = GetSwapChainBufferCount();
|
|
m_vsync_mode = mode;
|
|
if (!m_swap_chain)
|
|
return;
|
|
|
|
if (GetSwapChainBufferCount() != old_buffer_count)
|
|
{
|
|
DestroySwapChain();
|
|
if (!CreateSwapChain())
|
|
pxFailRel("Failed to recreate swap chain after vsync change.");
|
|
}
|
|
}
|
|
|
|
u32 GSDevice12::GetSwapChainBufferCount() const
|
|
{
|
|
// With vsync off, we only need two buffers. Same for blocking vsync.
|
|
// With triple buffering, we need three.
|
|
return (m_vsync_mode == GSVSyncMode::Mailbox) ? 3 : 2;
|
|
}
|
|
|
|
bool GSDevice12::CreateSwapChain()
|
|
{
|
|
constexpr DXGI_FORMAT swap_chain_format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
if (m_window_info.type != WindowInfo::Type::Win32)
|
|
return false;
|
|
|
|
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
|
|
RECT client_rc{};
|
|
GetClientRect(window_hwnd, &client_rc);
|
|
|
|
DXGI_MODE_DESC fullscreen_mode;
|
|
wil::com_ptr_nothrow<IDXGIOutput> fullscreen_output;
|
|
if (Host::IsFullscreen())
|
|
{
|
|
u32 fullscreen_width, fullscreen_height;
|
|
float fullscreen_refresh_rate;
|
|
m_is_exclusive_fullscreen =
|
|
GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) &&
|
|
D3D::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.get(), window_hwnd, fullscreen_width,
|
|
fullscreen_height, fullscreen_refresh_rate, swap_chain_format, &fullscreen_mode,
|
|
fullscreen_output.put());
|
|
|
|
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
|
|
if (m_vsync_mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
|
|
{
|
|
WARNING_LOG("D3D12: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
|
|
m_vsync_mode = GSVSyncMode::FIFO;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_is_exclusive_fullscreen = false;
|
|
}
|
|
|
|
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
|
|
swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
|
|
swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
|
|
swap_chain_desc.Format = swap_chain_format;
|
|
swap_chain_desc.SampleDesc.Count = 1;
|
|
swap_chain_desc.BufferCount = GetSwapChainBufferCount();
|
|
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
|
|
m_using_allow_tearing = (m_allow_tearing_supported && !m_is_exclusive_fullscreen);
|
|
if (m_using_allow_tearing)
|
|
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
if (m_is_exclusive_fullscreen)
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
|
|
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
|
|
|
|
fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
|
|
fs_sd_desc.Width = fullscreen_mode.Width;
|
|
fs_sd_desc.Height = fullscreen_mode.Height;
|
|
fs_desc.RefreshRate = fullscreen_mode.RefreshRate;
|
|
fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering;
|
|
fs_desc.Scaling = fullscreen_mode.Scaling;
|
|
fs_desc.Windowed = FALSE;
|
|
|
|
Console.WriteLn("D3D12: Creating a %dx%d exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
|
|
hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.get(), window_hwnd, &fs_sd_desc,
|
|
&fs_desc, fullscreen_output.get(), m_swap_chain.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Warning("D3D12: Failed to create fullscreen swap chain, trying windowed.");
|
|
m_is_exclusive_fullscreen = false;
|
|
m_using_allow_tearing = m_allow_tearing_supported;
|
|
}
|
|
}
|
|
|
|
if (!m_is_exclusive_fullscreen)
|
|
{
|
|
Console.WriteLn("D3D12: Creating a %dx%d windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height);
|
|
hr = m_dxgi_factory->CreateSwapChainForHwnd(
|
|
m_command_queue.get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.put());
|
|
|
|
if (FAILED(hr))
|
|
Console.Warning("D3D12: Failed to create windowed swap chain.");
|
|
}
|
|
|
|
// MWA needs to be called on the correct factory.
|
|
wil::com_ptr_nothrow<IDXGIFactory> swap_chain_factory;
|
|
hr = m_swap_chain->GetParent(IID_PPV_ARGS(swap_chain_factory.put()));
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = swap_chain_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
|
|
if (FAILED(hr))
|
|
Console.ErrorFmt("D3D12: MakeWindowAssociation() to disable ALT+ENTER failed: {}", Error::CreateHResult(hr).GetDescription());
|
|
}
|
|
else
|
|
{
|
|
Console.ErrorFmt("D3D12: GetParent() on swap chain to get factory failed: {}", Error::CreateHResult(hr).GetDescription());
|
|
}
|
|
|
|
if (!CreateSwapChainRTV())
|
|
{
|
|
DestroySwapChain();
|
|
return false;
|
|
}
|
|
|
|
// Render a frame as soon as possible to clear out whatever was previously being displayed.
|
|
EndRenderPass();
|
|
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
|
|
const D3D12CommandList& cmdlist = GetCommandList();
|
|
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
|
|
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget);
|
|
cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
|
|
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::Present);
|
|
ExecuteCommandList(false);
|
|
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CreateSwapChainRTV()
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC swap_chain_desc;
|
|
HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc);
|
|
if (FAILED(hr))
|
|
return false;
|
|
|
|
for (u32 i = 0; i < swap_chain_desc.BufferCount; i++)
|
|
{
|
|
ComPtr<ID3D12Resource> backbuffer;
|
|
hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D12: GetBuffer for RTV failed: 0x%08X", hr);
|
|
m_swap_chain_buffers.clear();
|
|
return false;
|
|
}
|
|
|
|
std::unique_ptr<GSTexture12> tex = GSTexture12::Adopt(std::move(backbuffer), GSTexture::Type::RenderTarget,
|
|
GSTexture::Format::Color, swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, 1,
|
|
swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format,
|
|
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, GSTexture12::ResourceState::Present);
|
|
if (!tex)
|
|
{
|
|
m_swap_chain_buffers.clear();
|
|
return false;
|
|
}
|
|
|
|
m_swap_chain_buffers.push_back(std::move(tex));
|
|
}
|
|
|
|
m_window_info.surface_width = swap_chain_desc.BufferDesc.Width;
|
|
m_window_info.surface_height = swap_chain_desc.BufferDesc.Height;
|
|
DevCon.WriteLn("D3D12: Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height);
|
|
|
|
if (m_window_info.type == WindowInfo::Type::Win32)
|
|
{
|
|
BOOL fullscreen = FALSE;
|
|
DXGI_SWAP_CHAIN_DESC desc;
|
|
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
|
|
SUCCEEDED(m_swap_chain->GetDesc(&desc)))
|
|
{
|
|
m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
|
|
static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
|
|
}
|
|
}
|
|
|
|
m_current_swap_chain_buffer = 0;
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::DestroySwapChainRTVs()
|
|
{
|
|
for (std::unique_ptr<GSTexture12>& buffer : m_swap_chain_buffers)
|
|
buffer->Destroy(false);
|
|
m_swap_chain_buffers.clear();
|
|
m_current_swap_chain_buffer = 0;
|
|
}
|
|
|
|
void GSDevice12::DestroySwapChain()
|
|
{
|
|
if (!m_swap_chain)
|
|
return;
|
|
|
|
DestroySwapChainRTVs();
|
|
|
|
// switch out of fullscreen before destroying
|
|
BOOL is_fullscreen;
|
|
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
|
|
m_swap_chain->SetFullscreenState(FALSE, nullptr);
|
|
|
|
m_swap_chain.reset();
|
|
m_is_exclusive_fullscreen = false;
|
|
}
|
|
|
|
bool GSDevice12::UpdateWindow()
|
|
{
|
|
ExecuteCommandList(true);
|
|
DestroySwapChain();
|
|
|
|
if (!AcquireWindow(false))
|
|
return false;
|
|
|
|
if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain())
|
|
{
|
|
Console.WriteLn("D3D12: Failed to create swap chain on updated window");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::DestroySurface()
|
|
{
|
|
ExecuteCommandList(true);
|
|
DestroySwapChain();
|
|
}
|
|
|
|
std::string GSDevice12::GetDriverInfo() const
|
|
{
|
|
std::string ret = "Unknown Feature Level";
|
|
|
|
static constexpr std::array<std::tuple<D3D_FEATURE_LEVEL, const char*>, 2> feature_level_names = {{
|
|
{D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"},
|
|
{D3D_FEATURE_LEVEL_11_1, "D3D_FEATURE_LEVEL_11_1"},
|
|
}};
|
|
|
|
for (size_t i = 0; i < std::size(feature_level_names); i++)
|
|
{
|
|
if (m_feature_level == std::get<0>(feature_level_names[i]))
|
|
{
|
|
ret = std::get<1>(feature_level_names[i]);
|
|
break;
|
|
}
|
|
}
|
|
|
|
ret += "\n";
|
|
|
|
DXGI_ADAPTER_DESC desc;
|
|
if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc)))
|
|
{
|
|
ret += StringUtil::StdStringFromFormat("VID: 0x%04X PID: 0x%04X\n", desc.VendorId, desc.DeviceId);
|
|
ret += StringUtil::WideStringToUTF8String(desc.Description);
|
|
ret += "\n";
|
|
|
|
const std::string driver_version(D3D::GetDriverVersionFromLUID(desc.AdapterLuid));
|
|
if (!driver_version.empty())
|
|
{
|
|
ret += "Driver Version: ";
|
|
ret += driver_version;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void GSDevice12::ResizeWindow(u32 new_window_width, u32 new_window_height, float new_window_scale)
|
|
{
|
|
if (!m_swap_chain)
|
|
return;
|
|
|
|
m_window_info.surface_scale = new_window_scale;
|
|
|
|
if (m_window_info.surface_width == new_window_width && m_window_info.surface_height == new_window_height)
|
|
return;
|
|
|
|
ExecuteCommandList(true);
|
|
|
|
DestroySwapChainRTVs();
|
|
|
|
HRESULT hr = m_swap_chain->ResizeBuffers(
|
|
0, 0, 0, DXGI_FORMAT_UNKNOWN, m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
|
|
if (FAILED(hr))
|
|
Console.Error("D3D12: ResizeBuffers() failed: 0x%08X", hr);
|
|
|
|
if (!CreateSwapChainRTV())
|
|
pxFailRel("Failed to recreate swap chain RTV after resize");
|
|
}
|
|
|
|
bool GSDevice12::SupportsExclusiveFullscreen() const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
GSDevice::PresentResult GSDevice12::BeginPresent(bool frame_skip)
|
|
{
|
|
EndRenderPass();
|
|
|
|
if (m_device_lost)
|
|
return PresentResult::DeviceLost;
|
|
|
|
if (frame_skip || !m_swap_chain)
|
|
{
|
|
if (!m_swap_chain)
|
|
{
|
|
ExecuteCommandList(WaitType::None);
|
|
InvalidateCachedState();
|
|
}
|
|
return PresentResult::FrameSkipped;
|
|
}
|
|
|
|
// Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
|
|
// This might get called repeatedly if it takes a while to switch back, that's the host's problem.
|
|
BOOL is_fullscreen;
|
|
if (m_is_exclusive_fullscreen &&
|
|
(FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
|
|
{
|
|
Host::RunOnCPUThread([]() { Host::SetFullscreen(false); });
|
|
return PresentResult::FrameSkipped;
|
|
}
|
|
|
|
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
|
|
|
|
const D3D12CommandList& cmdlist = GetCommandList();
|
|
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget);
|
|
cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
|
|
cmdlist.list4->OMSetRenderTargets(1, &swap_chain_buf->GetWriteDescriptor().cpu_handle, FALSE, nullptr);
|
|
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
|
|
|
|
const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast<float>(m_window_info.surface_width),
|
|
static_cast<float>(m_window_info.surface_height), 0.0f, 1.0f};
|
|
const D3D12_RECT scissor{
|
|
0, 0, static_cast<LONG>(m_window_info.surface_width), static_cast<LONG>(m_window_info.surface_height)};
|
|
cmdlist.list4->RSSetViewports(1, &vp);
|
|
cmdlist.list4->RSSetScissorRects(1, &scissor);
|
|
return PresentResult::OK;
|
|
}
|
|
|
|
void GSDevice12::EndPresent()
|
|
{
|
|
RenderImGui();
|
|
|
|
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
|
|
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
|
|
|
|
swap_chain_buf->TransitionToState(GSTexture12::ResourceState::Present);
|
|
if (!ExecuteCommandList(WaitType::None))
|
|
{
|
|
m_device_lost = true;
|
|
InvalidateCachedState();
|
|
return;
|
|
}
|
|
|
|
const UINT sync_interval = static_cast<UINT>(m_vsync_mode == GSVSyncMode::FIFO);
|
|
const UINT flags = (m_vsync_mode == GSVSyncMode::Disabled && m_using_allow_tearing) ? DXGI_PRESENT_ALLOW_TEARING : 0;
|
|
m_swap_chain->Present(sync_interval, flags);
|
|
|
|
InvalidateCachedState();
|
|
}
|
|
|
|
#ifdef ENABLE_OGL_DEBUG
|
|
static UINT Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
|
|
const std::array<float, 3>& c, const std::array<float, 3>& d)
|
|
{
|
|
std::array<float, 3> result;
|
|
result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0]));
|
|
result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1]));
|
|
result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2]));
|
|
return PIX_COLOR(static_cast<BYTE>(result[0] * 255.0f),
|
|
static_cast<BYTE>(result[1] * 255.0f),
|
|
static_cast<BYTE>(result[2] * 255.0f));
|
|
}
|
|
#endif
|
|
|
|
void GSDevice12::PushDebugGroup(const char* fmt, ...)
|
|
{
|
|
#ifdef ENABLE_OGL_DEBUG
|
|
if (!GSConfig.UseDebugDevice)
|
|
return;
|
|
|
|
std::va_list ap;
|
|
va_start(ap, fmt);
|
|
const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap));
|
|
va_end(ap);
|
|
|
|
const UINT color = Palette(
|
|
++s_debug_scope_depth, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
|
|
|
|
PIXBeginEvent(GetCommandList().list4.get(), color, "%s", buf.c_str());
|
|
#endif
|
|
}
|
|
|
|
void GSDevice12::PopDebugGroup()
|
|
{
|
|
#ifdef ENABLE_OGL_DEBUG
|
|
if (!GSConfig.UseDebugDevice)
|
|
return;
|
|
|
|
s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
|
|
|
|
PIXEndEvent(GetCommandList().list4.get());
|
|
#endif
|
|
}
|
|
|
|
void GSDevice12::InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...)
|
|
{
|
|
#ifdef ENABLE_OGL_DEBUG
|
|
if (!GSConfig.UseDebugDevice)
|
|
return;
|
|
|
|
std::va_list ap;
|
|
va_start(ap, fmt);
|
|
const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap));
|
|
va_end(ap);
|
|
|
|
if (buf.empty())
|
|
return;
|
|
|
|
static constexpr float colors[][3] = {
|
|
{0.1f, 0.1f, 0.0f}, // Cache
|
|
{0.1f, 0.1f, 0.0f}, // Reg
|
|
{0.5f, 0.0f, 0.5f}, // Debug
|
|
{0.0f, 0.5f, 0.5f}, // Message
|
|
{0.0f, 0.2f, 0.0f} // Performance
|
|
};
|
|
|
|
const float* fcolor = colors[static_cast<int>(category)];
|
|
const UINT color = PIX_COLOR(static_cast<BYTE>(fcolor[0] * 255.0f),
|
|
static_cast<BYTE>(fcolor[1] * 255.0f),
|
|
static_cast<BYTE>(fcolor[2] * 255.0f));
|
|
|
|
PIXSetMarker(GetCommandList().list4.get(), color, "%s", buf.c_str());
|
|
#endif
|
|
}
|
|
|
|
bool GSDevice12::CheckFeatures(const u32& vendor_id)
|
|
{
|
|
//const bool isAMD = (vendor_id == 0x1002 || vendor_id == 0x1022);
|
|
|
|
m_features.texture_barrier = GSConfig.OverrideTextureBarriers != 0;
|
|
m_features.multidraw_fb_copy = false;
|
|
m_features.broken_point_sampler = false;
|
|
m_features.primitive_id = true;
|
|
m_features.prefer_new_textures = true;
|
|
m_features.provoking_vertex_last = false;
|
|
m_features.point_expand = false;
|
|
m_features.line_expand = false;
|
|
m_features.framebuffer_fetch = false;
|
|
m_features.stencil_buffer = true;
|
|
m_features.cas_sharpening = true;
|
|
m_features.test_and_sample_depth = true;
|
|
m_features.vs_expand = !GSConfig.DisableVertexShaderExpand;
|
|
|
|
m_features.dxt_textures = SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) &&
|
|
SupportsTextureFormat(DXGI_FORMAT_BC2_UNORM) &&
|
|
SupportsTextureFormat(DXGI_FORMAT_BC3_UNORM);
|
|
m_features.bptc_textures = SupportsTextureFormat(DXGI_FORMAT_BC7_UNORM);
|
|
|
|
m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
|
|
|
m_programmable_sample_positions = SupportsProgrammableSamplePositions();
|
|
Console.WriteLnFmt("D3D12: Programmable Sample Position: {}", m_programmable_sample_positions ? "Supported" : "Not Supported");
|
|
|
|
BOOL allow_tearing_supported = false;
|
|
HRESULT hr = m_dxgi_factory->CheckFeatureSupport(
|
|
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
|
|
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
|
|
|
|
D3D12_FEATURE_DATA_D3D12_OPTIONS12 device_options12 = {};
|
|
hr = m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &device_options12, sizeof(device_options12));
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
Console.WriteLnFmt("D3D12: Enhanced Barriers: {}", device_options12.EnhancedBarriersSupported ? "Supported" : "Not Supported");
|
|
m_enhanced_barriers = device_options12.EnhancedBarriersSupported;
|
|
}
|
|
else
|
|
{
|
|
Console.WriteLnFmt("D3D12: Failed to check for Enhanced Barriers: 0x{:08x}", static_cast<unsigned long>(hr));
|
|
m_enhanced_barriers = false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::DrawPrimitive()
|
|
{
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
GetCommandList().list4->DrawInstanced(m_vertex.count, 1, m_vertex.start, 0);
|
|
}
|
|
|
|
void GSDevice12::DrawIndexedPrimitive()
|
|
{
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
GetCommandList().list4->DrawIndexedInstanced(m_index.count, 1, m_index.start, m_vertex.start, 0);
|
|
}
|
|
|
|
void GSDevice12::DrawIndexedPrimitive(int offset, int count)
|
|
{
|
|
pxAssert(offset + count <= (int)m_index.count);
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
GetCommandList().list4->DrawIndexedInstanced(count, 1, m_index.start + offset, m_vertex.start, 0);
|
|
}
|
|
|
|
void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_format, DXGI_FORMAT* srv_format,
|
|
DXGI_FORMAT* rtv_format, DXGI_FORMAT* dsv_format) const
|
|
{
|
|
static constexpr std::array<std::array<DXGI_FORMAT, 4>, static_cast<int>(GSTexture::Format::Last) + 1>
|
|
s_format_mapping = {{
|
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // Invalid
|
|
{DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
DXGI_FORMAT_UNKNOWN}, // Color
|
|
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM,
|
|
DXGI_FORMAT_UNKNOWN}, // ColorHQ
|
|
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
|
DXGI_FORMAT_UNKNOWN}, // ColorHDR
|
|
{DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
DXGI_FORMAT_UNKNOWN}, // ColorClip
|
|
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_UNKNOWN,
|
|
DXGI_FORMAT_D32_FLOAT_S8X24_UINT}, // DepthStencil
|
|
{DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN}, // UNorm8
|
|
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_UNKNOWN}, // UInt16
|
|
{DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_UNKNOWN}, // UInt32
|
|
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN}, // Int32
|
|
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // BC1
|
|
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // BC2
|
|
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // BC3
|
|
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // BC7
|
|
}};
|
|
|
|
const auto& mapping = s_format_mapping[static_cast<int>(format)];
|
|
if (d3d_format)
|
|
*d3d_format = mapping[0];
|
|
if (srv_format)
|
|
*srv_format = mapping[1];
|
|
if (rtv_format)
|
|
*rtv_format = mapping[2];
|
|
if (dsv_format)
|
|
*dsv_format = mapping[3];
|
|
}
|
|
|
|
GSTexture* GSDevice12::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
|
|
{
|
|
DXGI_FORMAT dxgi_format, srv_format, rtv_format, dsv_format;
|
|
LookupNativeFormat(format, &dxgi_format, &srv_format, &rtv_format, &dsv_format);
|
|
|
|
const DXGI_FORMAT uav_format = (type == GSTexture::Type::RWTexture) ? dxgi_format : DXGI_FORMAT_UNKNOWN;
|
|
|
|
std::unique_ptr<GSTexture12> tex(GSTexture12::Create(type, format, width, height, levels,
|
|
dxgi_format, srv_format, rtv_format, dsv_format, uav_format));
|
|
if (!tex)
|
|
{
|
|
// We're probably out of vram, try flushing the command buffer to release pending textures.
|
|
PurgePool();
|
|
ExecuteCommandListAndRestartRenderPass(true, "Couldn't allocate texture.");
|
|
tex = GSTexture12::Create(type, format, width, height, levels, dxgi_format, srv_format,
|
|
rtv_format, dsv_format, uav_format);
|
|
}
|
|
|
|
return tex.release();
|
|
}
|
|
|
|
std::unique_ptr<GSDownloadTexture> GSDevice12::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
|
|
{
|
|
return GSDownloadTexture12::Create(width, height, format);
|
|
}
|
|
|
|
void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
|
|
{
|
|
// Empty rect, abort copy.
|
|
if (r.rempty())
|
|
{
|
|
GL_INS("D3D12: CopyRect rect empty.");
|
|
return;
|
|
}
|
|
|
|
GSTexture12* const sTex12 = static_cast<GSTexture12*>(sTex);
|
|
GSTexture12* const dTex12 = static_cast<GSTexture12*>(dTex);
|
|
const GSVector4i src_rect(0, 0, sTex12->GetWidth(), sTex12->GetHeight());
|
|
const GSVector4i dst_rect(0, 0, dTex12->GetWidth(), dTex12->GetHeight());
|
|
const bool src_dst_rect_match = src_rect.eq(dst_rect);
|
|
|
|
// Sizes must match for full depth copies when no partial copies are supported.
|
|
if (sTex12->IsDepthStencil() && !src_dst_rect_match && !m_programmable_sample_positions)
|
|
{
|
|
GL_INS("D3D12: CopyRect rect mismatch for full depth copy.");
|
|
return;
|
|
}
|
|
|
|
const bool full_draw_copy = (sTex->IsDepthStencil() && !m_programmable_sample_positions) || dst_rect.eq(r);
|
|
|
|
// Source is cleared, if destination is a render target, we can carry the clear forward.
|
|
if (sTex12->GetState() == GSTexture::State::Cleared)
|
|
{
|
|
if (dTex12->IsRenderTargetOrDepthStencil())
|
|
{
|
|
if (ProcessClearsBeforeCopy(sTex, dTex, full_draw_copy))
|
|
return;
|
|
|
|
// Do an attachment clear.
|
|
EndRenderPass();
|
|
|
|
dTex12->SetState(GSTexture::State::Dirty);
|
|
|
|
if (dTex12->GetType() != GSTexture::Type::DepthStencil)
|
|
{
|
|
dTex12->TransitionToState(GSTexture12::ResourceState::RenderTarget);
|
|
GetCommandList().list4->ClearRenderTargetView(
|
|
dTex12->GetWriteDescriptor(), sTex12->GetUNormClearColor().v, 0, nullptr);
|
|
}
|
|
else
|
|
{
|
|
dTex12->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil);
|
|
GetCommandList().list4->ClearDepthStencilView(
|
|
dTex12->GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, sTex12->GetClearDepth(), 0, 0, nullptr);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// commit the clear to the source first, then do normal copy
|
|
sTex12->CommitClear();
|
|
}
|
|
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
|
|
// if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first
|
|
// (the area outside of where we're copying to)
|
|
if (dTex12->GetState() == GSTexture::State::Cleared && !full_draw_copy)
|
|
dTex12->CommitClear();
|
|
|
|
EndRenderPass();
|
|
|
|
sTex12->TransitionToState(GSTexture12::ResourceState::CopySrc);
|
|
sTex12->SetUseFenceCounter(GetCurrentFenceValue());
|
|
if (m_tfx_textures[0] && sTex12->GetSRVDescriptor() == m_tfx_textures[0])
|
|
PSSetShaderResource(0, nullptr, false);
|
|
|
|
dTex12->TransitionToState(GSTexture12::ResourceState::CopyDst);
|
|
dTex12->SetUseFenceCounter(GetCurrentFenceValue());
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION srcloc;
|
|
srcloc.pResource = sTex12->GetResource();
|
|
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
srcloc.SubresourceIndex = 0;
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dstloc;
|
|
dstloc.pResource = dTex12->GetResource();
|
|
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dstloc.SubresourceIndex = 0;
|
|
|
|
// DX12 requires ProgrammableSamplePositions tier 1 to support partial depth copies, otherwise fallback to full depth copies.
|
|
const bool full_rt_copy = src_dst_rect_match && ((sTex12->IsDepthStencil() && !m_programmable_sample_positions) || (destX == 0 && destY == 0 && r.eq(src_rect)));
|
|
if (full_rt_copy)
|
|
{
|
|
GetCommandList().list4->CopyResource(dTex12->GetResource(), sTex12->GetResource());
|
|
}
|
|
else
|
|
{
|
|
const D3D12_BOX srcbox{static_cast<UINT>(r.left), static_cast<UINT>(r.top), 0u, static_cast<UINT>(r.right),
|
|
static_cast<UINT>(r.bottom), 1u};
|
|
GetCommandList().list4->CopyTextureRegion(&dstloc, destX, destY, 0, &srcloc, &srcbox);
|
|
}
|
|
|
|
dTex12->SetState(GSTexture::State::Dirty);
|
|
}
|
|
|
|
void GSDevice12::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
|
GSHWDrawConfig::ColorMaskSelector cms, ShaderConvert shader, bool linear)
|
|
{
|
|
const bool allow_discard = (cms.wrgba == 0xf);
|
|
const ID3D12PipelineState* state;
|
|
if (HasVariableWriteMask(shader))
|
|
state = m_color_copy[GetShaderIndexForMask(shader, cms.wrgba)].get();
|
|
else
|
|
state = dTex ? m_convert[static_cast<int>(shader)].get() : m_present[static_cast<int>(shader)].get();
|
|
DoStretchRect(static_cast<GSTexture12*>(sTex), sRect, static_cast<GSTexture12*>(dTex), dRect,
|
|
state, linear, allow_discard);
|
|
}
|
|
|
|
void GSDevice12::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
|
PresentShader shader, float shaderTime, bool linear)
|
|
{
|
|
DisplayConstantBuffer cb;
|
|
cb.SetSource(sRect, sTex->GetSize());
|
|
cb.SetTarget(dRect, dTex ? dTex->GetSize() : GSVector2i(GetWindowWidth(), GetWindowHeight()));
|
|
cb.SetTime(shaderTime);
|
|
SetUtilityRootSignature();
|
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
|
|
|
DoStretchRect(static_cast<GSTexture12*>(sTex), sRect, static_cast<GSTexture12*>(dTex), dRect,
|
|
m_present[static_cast<int>(shader)].get(), linear, true);
|
|
}
|
|
|
|
void GSDevice12::UpdateCLUTTexture(
|
|
GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
|
{
|
|
// match merge cb
|
|
struct Uniforms
|
|
{
|
|
float scale;
|
|
float pad1[3];
|
|
u32 offsetX, offsetY, dOffset;
|
|
u32 pad2;
|
|
};
|
|
const Uniforms cb = {sScale, {}, offsetX, offsetY, dOffset};
|
|
SetUtilityRootSignature();
|
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
|
|
|
const GSVector4 dRect(0, 0, dSize, 1);
|
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
|
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
|
|
m_convert[static_cast<int>(shader)].get(), false, true);
|
|
}
|
|
|
|
void GSDevice12::ConvertToIndexedTexture(
|
|
GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
|
|
{
|
|
// match merge cb
|
|
struct Uniforms
|
|
{
|
|
float scale;
|
|
float pad1[3];
|
|
u32 SBW, DBW, SPSM;
|
|
};
|
|
|
|
const Uniforms cb = {sScale, {}, SBW, DBW, SPSM};
|
|
SetUtilityRootSignature();
|
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
|
|
|
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
|
|
const ShaderConvert shader = ((SPSM & 0xE) == 0) ? ShaderConvert::RGBA_TO_8I : ShaderConvert::RGB5A1_TO_8I;
|
|
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
|
|
m_convert[static_cast<int>(shader)].get(), false, true);
|
|
}
|
|
|
|
void GSDevice12::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min, const GSVector4& dRect)
|
|
{
|
|
struct Uniforms
|
|
{
|
|
float weight;
|
|
float step_multiplier;
|
|
float pad0[2];
|
|
GSVector2i clamp_min;
|
|
int downsample_factor;
|
|
int pad1;
|
|
};
|
|
|
|
const Uniforms cb = {
|
|
static_cast<float>(downsample_factor * downsample_factor), (GSConfig.UserHacks_NativeScaling > GSNativeScaling::Aggressive) ? 2.0f : 1.0f, {}, clamp_min, static_cast<int>(downsample_factor), 0};
|
|
SetUtilityRootSignature();
|
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
|
|
|
//const GSVector4 dRect = GSVector4(dTex->GetRect());
|
|
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
|
|
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
|
|
m_convert[static_cast<int>(shader)].get(), false, true);
|
|
}
|
|
|
|
void GSDevice12::DrawMultiStretchRects(
|
|
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
|
|
{
|
|
GSTexture* last_tex = rects[0].src;
|
|
bool last_linear = rects[0].linear;
|
|
u8 last_wmask = rects[0].wmask.wrgba;
|
|
|
|
u32 first = 0;
|
|
u32 count = 1;
|
|
|
|
// Make sure all textures are in shader read only layout, so we don't need to break
|
|
// the render pass to transition.
|
|
for (u32 i = 0; i < num_rects; i++)
|
|
{
|
|
GSTexture12* const stex = static_cast<GSTexture12*>(rects[i].src);
|
|
stex->CommitClear();
|
|
if (stex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
|
|
{
|
|
EndRenderPass();
|
|
stex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
}
|
|
|
|
for (u32 i = 1; i < num_rects; i++)
|
|
{
|
|
if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask)
|
|
{
|
|
count++;
|
|
continue;
|
|
}
|
|
|
|
DoMultiStretchRects(rects + first, count, static_cast<GSTexture12*>(dTex), shader);
|
|
last_tex = rects[i].src;
|
|
last_linear = rects[i].linear;
|
|
last_wmask = rects[i].wmask.wrgba;
|
|
first += count;
|
|
count = 1;
|
|
}
|
|
|
|
DoMultiStretchRects(rects + first, count, static_cast<GSTexture12*>(dTex), shader);
|
|
}
|
|
|
|
void GSDevice12::DoMultiStretchRects(
|
|
const MultiStretchRect* rects, u32 num_rects, GSTexture12* dTex, ShaderConvert shader)
|
|
{
|
|
// Set up vertices first.
|
|
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
|
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
|
{
|
|
pxFailRel("Failed to reserve space for vertices");
|
|
}
|
|
}
|
|
|
|
// Pain in the arse because the primitive topology for the pipelines is all triangle strips.
|
|
// Don't use primitive restart here, it ends up slower on some drivers.
|
|
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
|
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
|
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
|
|
u32 icount = 0;
|
|
u32 vcount = 0;
|
|
for (u32 i = 0; i < num_rects; i++)
|
|
{
|
|
const GSVector4& sRect = rects[i].src_rect;
|
|
const GSVector4& dRect = rects[i].dst_rect;
|
|
|
|
const float left = dRect.x * 2 / ds.x - 1.0f;
|
|
const float top = 1.0f - dRect.y * 2 / ds.y;
|
|
const float right = dRect.z * 2 / ds.x - 1.0f;
|
|
const float bottom = 1.0f - dRect.w * 2 / ds.y;
|
|
|
|
const u32 vstart = vcount;
|
|
verts[vcount++] = {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)};
|
|
verts[vcount++] = {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)};
|
|
verts[vcount++] = {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)};
|
|
verts[vcount++] = {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)};
|
|
|
|
if (i > 0)
|
|
idx[icount++] = vstart;
|
|
|
|
idx[icount++] = vstart;
|
|
idx[icount++] = vstart + 1;
|
|
idx[icount++] = vstart + 2;
|
|
idx[icount++] = vstart + 3;
|
|
idx[icount++] = vstart + 3;
|
|
};
|
|
|
|
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
|
m_vertex.count = vcount;
|
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
|
m_index.count = icount;
|
|
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
|
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
|
|
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(GSVertexPT1));
|
|
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
|
|
|
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
|
const GSVector4i rc(dTex->GetRect());
|
|
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr, rc);
|
|
if (!InRenderPass())
|
|
BeginRenderPassForStretchRect(dTex, rc, rc, false);
|
|
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu);
|
|
|
|
pxAssert(HasVariableWriteMask(shader) || rects[0].wmask.wrgba == 0xf);
|
|
SetPipeline((rects[0].wmask.wrgba != 0xf) ?
|
|
m_color_copy[GetShaderIndexForMask(shader, rects[0].wmask.wrgba)].get() :
|
|
m_convert[static_cast<int>(shader)].get());
|
|
|
|
if (ApplyUtilityState())
|
|
DrawIndexedPrimitive();
|
|
}
|
|
|
|
void GSDevice12::BeginRenderPassForStretchRect(
|
|
GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc, bool allow_discard)
|
|
{
|
|
const D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE load_op = (allow_discard && dst_rc.eq(dtex_rc)) ?
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD :
|
|
GetLoadOpForTexture(dTex);
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
|
|
if (dTex->GetType() != GSTexture::Type::DepthStencil)
|
|
{
|
|
BeginRenderPass(load_op, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
dTex->GetUNormClearColor());
|
|
}
|
|
else
|
|
{
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, load_op, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
GSVector4::zero(), dTex->GetClearDepth());
|
|
}
|
|
}
|
|
|
|
void GSDevice12::DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect,
|
|
const ID3D12PipelineState* pipeline, bool linear, bool allow_discard)
|
|
{
|
|
if (sTex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
|
|
{
|
|
// can't transition in a render pass
|
|
EndRenderPass();
|
|
sTex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
SetUtilityRootSignature();
|
|
SetUtilityTexture(sTex, linear ? m_linear_sampler_cpu : m_point_sampler_cpu);
|
|
SetPipeline(pipeline);
|
|
|
|
const bool is_present = (!dTex);
|
|
const bool depth = (dTex && dTex->GetType() == GSTexture::Type::DepthStencil);
|
|
const GSVector2i size(is_present ? GSVector2i(GetWindowWidth(), GetWindowHeight()) : dTex->GetSize());
|
|
const GSVector4i dtex_rc(0, 0, size.x, size.y);
|
|
const GSVector4i dst_rc(GSVector4i(dRect).rintersect(dtex_rc));
|
|
|
|
// switch rts (which might not end the render pass), so check the bounds
|
|
if (!is_present)
|
|
{
|
|
OMSetRenderTargets(depth ? nullptr : dTex, depth ? dTex : nullptr, dst_rc);
|
|
}
|
|
else
|
|
{
|
|
// this is for presenting, we don't want to screw with the viewport/scissor set by display
|
|
m_dirty_flags &= ~(DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
|
|
}
|
|
|
|
const bool drawing_to_current_rt = (is_present || InRenderPass());
|
|
if (!drawing_to_current_rt)
|
|
BeginRenderPassForStretchRect(dTex, dtex_rc, dst_rc, allow_discard);
|
|
|
|
DrawStretchRect(sRect, dRect, size);
|
|
}
|
|
|
|
void GSDevice12::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
|
|
{
|
|
// ia
|
|
const float left = dRect.x * 2 / ds.x - 1.0f;
|
|
const float top = 1.0f - dRect.y * 2 / ds.y;
|
|
const float right = dRect.z * 2 / ds.x - 1.0f;
|
|
const float bottom = 1.0f - dRect.w * 2 / ds.y;
|
|
|
|
GSVertexPT1 vertices[] = {
|
|
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
|
|
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
|
|
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
|
|
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
|
|
};
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
|
|
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
|
|
if (ApplyUtilityState())
|
|
DrawPrimitive();
|
|
}
|
|
|
|
void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
|
|
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear)
|
|
{
|
|
GL_PUSH("DoMerge");
|
|
|
|
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);
|
|
const bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1;
|
|
const bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0;
|
|
const bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1;
|
|
const D3D12DescriptorHandle& sampler = linear ? m_linear_sampler_cpu : m_point_sampler_cpu;
|
|
// Merge the 2 source textures (sTex[0],sTex[1]). Final results go to dTex. Feedback write will go to sTex[2].
|
|
// If either 2nd output is disabled or SLBG is 1, a background color will be used.
|
|
// Note: background color is also used when outside of the unit rectangle area
|
|
EndRenderPass();
|
|
|
|
// transition everything before starting the new render pass
|
|
const bool has_input_0 =
|
|
(sTex[0] && (sTex[0]->GetState() == GSTexture::State::Dirty ||
|
|
(sTex[0]->GetState() == GSTexture::State::Cleared || sTex[0]->GetClearColor() != 0)));
|
|
const bool has_input_1 = (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg) && sTex[1] &&
|
|
(sTex[1]->GetState() == GSTexture::State::Dirty ||
|
|
(sTex[1]->GetState() == GSTexture::State::Cleared || sTex[1]->GetClearColor() != 0));
|
|
if (has_input_0)
|
|
{
|
|
static_cast<GSTexture12*>(sTex[0])->CommitClear();
|
|
static_cast<GSTexture12*>(sTex[0])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
if (has_input_1)
|
|
{
|
|
static_cast<GSTexture12*>(sTex[1])->CommitClear();
|
|
static_cast<GSTexture12*>(sTex[1])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget);
|
|
|
|
// Upload constant to select YUV algo, but skip constant buffer update if we don't need it
|
|
if (feedback_write_2 || feedback_write_1 || sTex[0])
|
|
{
|
|
SetUtilityRootSignature();
|
|
const MergeConstantBuffer uniforms = {GSVector4::unorm8(c), EXTBUF.EMODA, EXTBUF.EMODC};
|
|
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
|
|
}
|
|
|
|
const GSVector2i dsize(dTex->GetSize());
|
|
const GSVector4i darea(0, 0, dsize.x, dsize.y);
|
|
bool dcleared = false;
|
|
if (has_input_1 && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg))
|
|
{
|
|
// 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output
|
|
// Note: value outside of dRect must contains the background color (c)
|
|
OMSetRenderTargets(dTex, nullptr, darea);
|
|
SetUtilityTexture(sTex[1], sampler);
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, GSVector4::unorm8(c));
|
|
SetUtilityRootSignature();
|
|
SetPipeline(m_convert[static_cast<int>(ShaderConvert::COPY)].get());
|
|
DrawStretchRect(sRect[1], PMODE.SLBG ? dRect[2] : dRect[1], dsize);
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
dcleared = true;
|
|
}
|
|
|
|
// Upload constant to select YUV algo
|
|
const GSVector2i fbsize(sTex[2] ? sTex[2]->GetSize() : GSVector2i(0, 0));
|
|
const GSVector4i fbarea(0, 0, fbsize.x, fbsize.y);
|
|
if (feedback_write_2) // FIXME I'm not sure dRect[1] is always correct
|
|
{
|
|
EndRenderPass();
|
|
OMSetRenderTargets(sTex[2], nullptr, fbarea);
|
|
if (dcleared)
|
|
SetUtilityTexture(dTex, sampler);
|
|
|
|
// sTex[2] can be sTex[0], in which case it might be cleared (e.g. Xenosaga).
|
|
BeginRenderPassForStretchRect(static_cast<GSTexture12*>(sTex[2]), fbarea, GSVector4i(dRect[2]));
|
|
if (dcleared)
|
|
{
|
|
SetUtilityRootSignature();
|
|
SetPipeline(m_convert[static_cast<int>(ShaderConvert::YUV)].get());
|
|
DrawStretchRect(full_r, dRect[2], fbsize);
|
|
}
|
|
EndRenderPass();
|
|
|
|
if (sTex[0] == sTex[2])
|
|
{
|
|
// need a barrier here because of the render pass
|
|
static_cast<GSTexture12*>(sTex[2])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
}
|
|
|
|
// Restore background color to process the normal merge
|
|
if (feedback_write_2_but_blend_bg || !dcleared)
|
|
{
|
|
EndRenderPass();
|
|
OMSetRenderTargets(dTex, nullptr, darea);
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
GSVector4::unorm8(c));
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
}
|
|
else if (!InRenderPass())
|
|
{
|
|
OMSetRenderTargets(dTex, nullptr, darea);
|
|
BeginRenderPass(
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE);
|
|
}
|
|
|
|
if (has_input_0)
|
|
{
|
|
// 1st output is enabled. It must be blended
|
|
SetUtilityRootSignature();
|
|
SetUtilityTexture(sTex[0], sampler);
|
|
SetPipeline(m_merge[PMODE.MMOD].get());
|
|
DrawStretchRect(sRect[0], dRect[0], dTex->GetSize());
|
|
}
|
|
|
|
if (feedback_write_1) // FIXME I'm not sure dRect[0] is always correct
|
|
{
|
|
EndRenderPass();
|
|
SetUtilityRootSignature();
|
|
SetPipeline(m_convert[static_cast<int>(ShaderConvert::YUV)].get());
|
|
SetUtilityTexture(dTex, sampler);
|
|
OMSetRenderTargets(sTex[2], nullptr, fbarea);
|
|
BeginRenderPass(
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE);
|
|
DrawStretchRect(full_r, dRect[2], dsize);
|
|
}
|
|
|
|
EndRenderPass();
|
|
|
|
// this texture is going to get used as an input, so make sure we don't read undefined data
|
|
static_cast<GSTexture12*>(dTex)->CommitClear();
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
|
ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb)
|
|
{
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget);
|
|
|
|
const GSVector4i rc = GSVector4i(dRect);
|
|
const GSVector4i dtex_rc = dTex->GetRect();
|
|
const GSVector4i clamped_rc = rc.rintersect(dtex_rc);
|
|
EndRenderPass();
|
|
OMSetRenderTargets(dTex, nullptr, clamped_rc);
|
|
SetUtilityRootSignature();
|
|
SetUtilityTexture(sTex, linear ? m_linear_sampler_cpu : m_point_sampler_cpu);
|
|
BeginRenderPassForStretchRect(static_cast<GSTexture12*>(dTex), dTex->GetRect(), clamped_rc, false);
|
|
SetPipeline(m_interlace[static_cast<int>(shader)].get());
|
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
|
DrawStretchRect(sRect, dRect, dTex->GetSize());
|
|
EndRenderPass();
|
|
|
|
// this texture is going to get used as an input, so make sure we don't read undefined data
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4])
|
|
{
|
|
const GSVector4 sRect = GSVector4(0.0f, 0.0f, 1.0f, 1.0f);
|
|
const GSVector4i dRect = dTex->GetRect();
|
|
EndRenderPass();
|
|
OMSetRenderTargets(dTex, nullptr, dRect);
|
|
SetUtilityRootSignature();
|
|
SetUtilityTexture(sTex, m_point_sampler_cpu);
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS);
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
SetPipeline(m_shadeboost_pipeline.get());
|
|
SetUtilityPushConstants(params, sizeof(float) * 4);
|
|
DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize());
|
|
EndRenderPass();
|
|
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex)
|
|
{
|
|
const GSVector4 sRect = GSVector4(0.0f, 0.0f, 1.0f, 1.0f);
|
|
const GSVector4i dRect = dTex->GetRect();
|
|
EndRenderPass();
|
|
OMSetRenderTargets(dTex, nullptr, dRect);
|
|
SetUtilityRootSignature();
|
|
SetUtilityTexture(sTex, m_linear_sampler_cpu);
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS);
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
SetPipeline(m_fxaa_pipeline.get());
|
|
DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize());
|
|
EndRenderPass();
|
|
|
|
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
bool GSDevice12::CompileCASPipelines()
|
|
{
|
|
D3D12::RootSignatureBuilder rsb;
|
|
rsb.Add32BitConstants(0, NUM_CAS_CONSTANTS, D3D12_SHADER_VISIBILITY_ALL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_ALL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, 1, D3D12_SHADER_VISIBILITY_ALL);
|
|
m_cas_root_signature = rsb.Create(false);
|
|
if (!m_cas_root_signature)
|
|
return false;
|
|
|
|
std::optional<std::string> cas_source = ReadShaderSource("shaders/dx11/cas.hlsl");
|
|
if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value()))
|
|
return false;
|
|
|
|
static constexpr D3D_SHADER_MACRO sharpen_only_macros[] = {{"CAS_SHARPEN_ONLY", "1"}, {nullptr, nullptr}};
|
|
|
|
const ComPtr<ID3DBlob> cs_upscale(m_shader_cache.GetComputeShader(cas_source.value(), nullptr, "main"));
|
|
const ComPtr<ID3DBlob> cs_sharpen(m_shader_cache.GetComputeShader(cas_source.value(), sharpen_only_macros, "main"));
|
|
if (!cs_upscale || !cs_sharpen)
|
|
return false;
|
|
|
|
D3D12::ComputePipelineBuilder cpb;
|
|
cpb.SetRootSignature(m_cas_root_signature.get());
|
|
cpb.SetShader(cs_upscale->GetBufferPointer(), cs_upscale->GetBufferSize());
|
|
m_cas_upscale_pipeline = cpb.Create(m_device.get(), m_shader_cache, false);
|
|
cpb.SetShader(cs_sharpen->GetBufferPointer(), cs_sharpen->GetBufferSize());
|
|
m_cas_sharpen_pipeline = cpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_cas_upscale_pipeline || !m_cas_sharpen_pipeline)
|
|
{
|
|
Console.Error("D3D12: Failed to create CAS pipelines");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompileImGuiPipeline()
|
|
{
|
|
const std::optional<std::string> hlsl = ReadShaderSource("shaders/dx11/imgui.fx");
|
|
if (!hlsl.has_value())
|
|
{
|
|
Console.Error("D3D12: Failed to read imgui.fx");
|
|
return false;
|
|
}
|
|
|
|
const ComPtr<ID3DBlob> vs = m_shader_cache.GetVertexShader(hlsl.value(), nullptr, "vs_main");
|
|
const ComPtr<ID3DBlob> ps = m_shader_cache.GetPixelShader(hlsl.value(), nullptr, "ps_main");
|
|
if (!vs || !ps)
|
|
{
|
|
Console.Error("D3D12: Failed to compile ImGui shaders");
|
|
return false;
|
|
}
|
|
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, offsetof(ImDrawVert, pos));
|
|
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, offsetof(ImDrawVert, uv));
|
|
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(ImDrawVert, col));
|
|
gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
|
|
gpb.SetVertexShader(vs.get());
|
|
gpb.SetPixelShader(ps.get());
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoDepthTestState();
|
|
gpb.SetBlendState(0, true, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
|
|
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD);
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
|
|
m_imgui_pipeline = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_imgui_pipeline)
|
|
{
|
|
Console.Error("D3D12: Failed to compile ImGui pipeline");
|
|
return false;
|
|
}
|
|
|
|
D3D12::SetObjectName(m_imgui_pipeline.get(), "ImGui pipeline");
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::RenderImGui()
|
|
{
|
|
ImGui::Render();
|
|
const ImDrawData* draw_data = ImGui::GetDrawData();
|
|
if (draw_data->CmdListsCount == 0)
|
|
return;
|
|
|
|
UpdateImGuiTextures();
|
|
|
|
const float L = 0.0f;
|
|
const float R = static_cast<float>(m_window_info.surface_width);
|
|
const float T = 0.0f;
|
|
const float B = static_cast<float>(m_window_info.surface_height);
|
|
|
|
// clang-format off
|
|
const float ortho_projection[4][4] =
|
|
{
|
|
{ 2.0f/(R-L), 0.0f, 0.0f, 0.0f },
|
|
{ 0.0f, 2.0f/(T-B), 0.0f, 0.0f },
|
|
{ 0.0f, 0.0f, 0.5f, 0.0f },
|
|
{ (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f },
|
|
};
|
|
// clang-format on
|
|
|
|
SetUtilityRootSignature();
|
|
SetUtilityPushConstants(ortho_projection, sizeof(ortho_projection));
|
|
SetPipeline(m_imgui_pipeline.get());
|
|
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
|
|
if (m_utility_sampler_cpu != m_linear_sampler_cpu)
|
|
{
|
|
m_utility_sampler_cpu = m_linear_sampler_cpu;
|
|
m_dirty_flags |= DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE;
|
|
|
|
// just skip if we run out.. we can't resume the present render pass :/
|
|
if (!GetSamplerAllocator().LookupSingle(&m_utility_sampler_gpu, m_linear_sampler_cpu))
|
|
{
|
|
Console.Warning("D3D12: Skipping ImGui draw because of no descriptors");
|
|
return;
|
|
}
|
|
}
|
|
|
|
// this is for presenting, we don't want to screw with the viewport/scissor set by display
|
|
m_dirty_flags &= ~(DIRTY_FLAG_RENDER_TARGET | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
|
|
|
|
for (int n = 0; n < draw_data->CmdListsCount; n++)
|
|
{
|
|
const ImDrawList* cmd_list = draw_data->CmdLists[n];
|
|
|
|
u32 vertex_offset;
|
|
{
|
|
const u32 size = sizeof(ImDrawVert) * static_cast<u32>(cmd_list->VtxBuffer.Size);
|
|
if (!m_vertex_stream_buffer.ReserveMemory(size, sizeof(ImDrawVert)))
|
|
{
|
|
Console.Warning("D3D12: Skipping ImGui draw because of no vertex buffer space");
|
|
return;
|
|
}
|
|
|
|
vertex_offset = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(ImDrawVert);
|
|
std::memcpy(m_vertex_stream_buffer.GetCurrentHostPointer(), cmd_list->VtxBuffer.Data, size);
|
|
m_vertex_stream_buffer.CommitMemory(size);
|
|
}
|
|
|
|
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(ImDrawVert));
|
|
|
|
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
|
|
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
|
{
|
|
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
|
|
pxAssert(!pcmd->UserCallback);
|
|
|
|
const GSVector4 clip = GSVector4::load<false>(&pcmd->ClipRect);
|
|
if ((clip.zwzw() <= clip.xyxy()).mask() != 0)
|
|
continue;
|
|
|
|
SetScissor(GSVector4i(clip));
|
|
|
|
GSTexture12* tex = reinterpret_cast<GSTexture12*>(pcmd->GetTexID());
|
|
D3D12DescriptorHandle handle = m_null_texture->GetSRVDescriptor();
|
|
if (tex)
|
|
{
|
|
tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
handle = tex->GetSRVDescriptor();
|
|
}
|
|
|
|
if (m_utility_texture_cpu != handle)
|
|
{
|
|
m_utility_texture_cpu = handle;
|
|
m_dirty_flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE;
|
|
|
|
if (!GetTextureGroupDescriptors(&m_utility_texture_gpu, &handle, 1))
|
|
{
|
|
Console.Warning("D3D12: Skipping ImGui draw because of no descriptors");
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (ApplyUtilityState())
|
|
{
|
|
GetCommandList().list4->DrawIndexedInstanced(
|
|
pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
|
}
|
|
}
|
|
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
|
}
|
|
}
|
|
|
|
bool GSDevice12::DoCAS(
|
|
GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
|
|
{
|
|
EndRenderPass();
|
|
|
|
GSTexture12* const sTex12 = static_cast<GSTexture12*>(sTex);
|
|
GSTexture12* const dTex12 = static_cast<GSTexture12*>(dTex);
|
|
D3D12DescriptorHandle sTexDH, dTexDH;
|
|
if (!GetTextureGroupDescriptors(&sTexDH, &sTex12->GetSRVDescriptor(), 1) ||
|
|
!GetTextureGroupDescriptors(&dTexDH, &dTex12->GetUAVDescriptor(), 1))
|
|
{
|
|
ExecuteCommandList(false, "Ran out of descriptors for CAS");
|
|
if (!GetTextureGroupDescriptors(&sTexDH, &sTex12->GetSRVDescriptor(), 1) ||
|
|
!GetTextureGroupDescriptors(&dTexDH, &dTex12->GetUAVDescriptor(), 1))
|
|
{
|
|
Console.Error("D3D12: Failed to allocate CAS descriptors.");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const D3D12CommandList& cmdlist = GetCommandList();
|
|
const GSTexture12::ResourceState old_state = sTex12->GetResourceState();
|
|
sTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::ComputeShaderResource);
|
|
dTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::CASShaderUAV);
|
|
|
|
cmdlist.list4->SetComputeRootSignature(m_cas_root_signature.get());
|
|
cmdlist.list4->SetComputeRoot32BitConstants(
|
|
CAS_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, NUM_CAS_CONSTANTS, constants.data(), 0);
|
|
cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH);
|
|
cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH);
|
|
cmdlist.list4->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get());
|
|
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
|
|
|
static const int threadGroupWorkRegionDim = 16;
|
|
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
|
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
|
cmdlist.list4->Dispatch(dispatchX, dispatchY, 1);
|
|
|
|
sTex12->TransitionToState(cmdlist, old_state);
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
|
{
|
|
const u32 size = static_cast<u32>(stride) * static_cast<u32>(count);
|
|
if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast<u32>(stride)))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading to vertex buffer");
|
|
if (!m_vertex_stream_buffer.ReserveMemory(size, static_cast<u32>(stride)))
|
|
pxFailRel("Failed to reserve space for vertices");
|
|
}
|
|
|
|
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / stride;
|
|
m_vertex.count = count;
|
|
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), stride);
|
|
|
|
GSVector4i::storent(m_vertex_stream_buffer.GetCurrentHostPointer(), vertex, count * stride);
|
|
m_vertex_stream_buffer.CommitMemory(size);
|
|
}
|
|
|
|
void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
|
{
|
|
const u32 size = sizeof(u16) * static_cast<u32>(count);
|
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
|
pxFailRel("Failed to reserve space for vertices");
|
|
}
|
|
|
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
|
m_index.count = count;
|
|
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
|
|
|
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
|
m_index_stream_buffer.CommitMemory(size);
|
|
}
|
|
|
|
void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool depth_read)
|
|
{
|
|
GSTexture12* vkRt = static_cast<GSTexture12*>(rt);
|
|
GSTexture12* vkDs = static_cast<GSTexture12*>(ds);
|
|
pxAssert(vkRt || vkDs);
|
|
|
|
if (m_current_render_target != vkRt || m_current_depth_target != vkDs || m_current_depth_read_only != depth_read)
|
|
{
|
|
// framebuffer change
|
|
EndRenderPass();
|
|
}
|
|
else if (InRenderPass())
|
|
{
|
|
// Framebuffer unchanged, but check for clears. Have to restart render pass, unlike Vulkan.
|
|
// We'll take care of issuing the actual clear there, because we have to start one anyway.
|
|
if (vkRt && vkRt->GetState() != GSTexture::State::Dirty)
|
|
{
|
|
if (vkRt->GetState() == GSTexture::State::Cleared)
|
|
EndRenderPass();
|
|
else
|
|
vkRt->SetState(GSTexture::State::Dirty);
|
|
}
|
|
if (vkDs && vkDs->GetState() != GSTexture::State::Dirty)
|
|
{
|
|
if (vkDs->GetState() == GSTexture::State::Cleared)
|
|
EndRenderPass();
|
|
else
|
|
vkDs->SetState(GSTexture::State::Dirty);
|
|
}
|
|
}
|
|
|
|
m_current_render_target = vkRt;
|
|
m_current_depth_target = vkDs;
|
|
m_current_depth_read_only = depth_read;
|
|
|
|
if (!InRenderPass())
|
|
{
|
|
if (vkRt)
|
|
vkRt->TransitionToState(GSTexture12::ResourceState::RenderTarget);
|
|
if (vkDs)
|
|
vkDs->TransitionToState(depth_read ? GSTexture12::ResourceState::DepthReadStencil : GSTexture12::ResourceState::DepthWriteStencil);
|
|
}
|
|
|
|
// This is used to set/initialize the framebuffer for tfx rendering.
|
|
const GSVector2i size = vkRt ? vkRt->GetSize() : vkDs->GetSize();
|
|
const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast<float>(size.x), static_cast<float>(size.y), 0.0f, 1.0f};
|
|
|
|
SetViewport(vp);
|
|
SetScissor(scissor);
|
|
}
|
|
|
|
bool GSDevice12::GetSampler(D3D12DescriptorHandle* cpu_handle, GSHWDrawConfig::SamplerSelector ss)
|
|
{
|
|
const auto it = m_samplers.find(ss.key);
|
|
if (it != m_samplers.end())
|
|
{
|
|
*cpu_handle = it->second;
|
|
return true;
|
|
}
|
|
|
|
D3D12_SAMPLER_DESC sd = {};
|
|
const int anisotropy = GSConfig.MaxAnisotropy;
|
|
if (anisotropy > 1 && ss.aniso)
|
|
{
|
|
sd.Filter = D3D12_FILTER_ANISOTROPIC;
|
|
}
|
|
else
|
|
{
|
|
static constexpr std::array<D3D12_FILTER, 8> filters = {{
|
|
D3D12_FILTER_MIN_MAG_MIP_POINT, // 000 / min=point,mag=point,mip=point
|
|
D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT, // 001 / min=linear,mag=point,mip=point
|
|
D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT, // 010 / min=point,mag=linear,mip=point
|
|
D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, // 011 / min=linear,mag=linear,mip=point
|
|
D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR, // 100 / min=point,mag=point,mip=linear
|
|
D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, // 101 / min=linear,mag=point,mip=linear
|
|
D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR, // 110 / min=point,mag=linear,mip=linear
|
|
D3D12_FILTER_MIN_MAG_MIP_LINEAR, // 111 / min=linear,mag=linear,mip=linear
|
|
}};
|
|
|
|
const u8 index = (static_cast<u8>(ss.IsMipFilterLinear()) << 2) |
|
|
(static_cast<u8>(ss.IsMagFilterLinear()) << 1) | static_cast<u8>(ss.IsMinFilterLinear());
|
|
sd.Filter = filters[index];
|
|
}
|
|
|
|
sd.AddressU = ss.tau ? D3D12_TEXTURE_ADDRESS_MODE_WRAP : D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sd.AddressV = ss.tav ? D3D12_TEXTURE_ADDRESS_MODE_WRAP : D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sd.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
sd.MinLOD = 0.0f;
|
|
sd.MaxLOD = (ss.lodclamp || !ss.UseMipmapFiltering()) ? 0.25f : FLT_MAX;
|
|
sd.MaxAnisotropy = std::clamp<u8>(GSConfig.MaxAnisotropy, 1, 16);
|
|
sd.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
|
|
|
|
if (!GetSamplerHeapManager().Allocate(cpu_handle))
|
|
return false;
|
|
|
|
m_device.get()->CreateSampler(&sd, *cpu_handle);
|
|
m_samplers.emplace(ss.key, *cpu_handle);
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::ClearSamplerCache()
|
|
{
|
|
ExecuteCommandList(false);
|
|
for (const auto& it : m_samplers)
|
|
m_sampler_heap_manager.Free(it.second.index);
|
|
m_samplers.clear();
|
|
InvalidateSamplerGroups();
|
|
InitializeSamplers();
|
|
|
|
m_utility_sampler_gpu = m_point_sampler_cpu;
|
|
m_tfx_samplers_handle_gpu.Clear();
|
|
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS;
|
|
}
|
|
|
|
bool GSDevice12::GetTextureGroupDescriptors(
|
|
D3D12DescriptorHandle* gpu_handle, const D3D12DescriptorHandle* cpu_handles, u32 count)
|
|
{
|
|
if (!GetDescriptorAllocator().Allocate(count, gpu_handle))
|
|
return false;
|
|
|
|
if (count == 1)
|
|
{
|
|
m_device.get()->CopyDescriptorsSimple(
|
|
1, *gpu_handle, cpu_handles[0], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
return true;
|
|
}
|
|
|
|
D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NUM_TFX_TEXTURES];
|
|
UINT src_sizes[NUM_TFX_TEXTURES];
|
|
pxAssert(count <= NUM_TFX_TEXTURES);
|
|
for (u32 i = 0; i < count; i++)
|
|
{
|
|
src_handles[i] = cpu_handles[i];
|
|
src_sizes[i] = 1;
|
|
}
|
|
m_device.get()->CopyDescriptors(
|
|
1, &dst_handle, &count, count, src_handles, src_sizes, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
return true;
|
|
}
|
|
|
|
static void AddUtilityVertexAttributes(D3D12::GraphicsPipelineBuilder& gpb)
|
|
{
|
|
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0);
|
|
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16);
|
|
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
|
|
gpb.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
|
|
}
|
|
|
|
GSDevice12::ComPtr<ID3DBlob> GSDevice12::GetUtilityVertexShader(const std::string& source, const char* entry_point)
|
|
{
|
|
ShaderMacro sm_model;
|
|
return m_shader_cache.GetVertexShader(source, sm_model.GetPtr(), entry_point);
|
|
}
|
|
|
|
GSDevice12::ComPtr<ID3DBlob> GSDevice12::GetUtilityPixelShader(const std::string& source, const char* entry_point)
|
|
{
|
|
ShaderMacro sm_model;
|
|
return m_shader_cache.GetPixelShader(source, sm_model.GetPtr(), entry_point);
|
|
}
|
|
|
|
bool GSDevice12::CreateNullTexture()
|
|
{
|
|
m_null_texture =
|
|
GSTexture12::Create(GSTexture::Type::Texture, GSTexture::Format::Color, 1, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN);
|
|
if (!m_null_texture)
|
|
return false;
|
|
|
|
m_null_texture->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
D3D12::SetObjectName(m_null_texture->GetResource(), "Null texture");
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CreateBuffers()
|
|
{
|
|
if (!m_vertex_stream_buffer.Create(VERTEX_BUFFER_SIZE))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate vertex buffer");
|
|
return false;
|
|
}
|
|
|
|
if (!m_index_stream_buffer.Create(INDEX_BUFFER_SIZE))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate index buffer");
|
|
return false;
|
|
}
|
|
|
|
if (!m_vertex_constant_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate vertex uniform buffer");
|
|
return false;
|
|
}
|
|
|
|
if (!m_pixel_constant_buffer.Create(FRAGMENT_UNIFORM_BUFFER_SIZE))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate fragment uniform buffer");
|
|
return false;
|
|
}
|
|
|
|
if (!m_texture_stream_buffer.Create(TEXTURE_UPLOAD_BUFFER_SIZE))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate texture stream buffer");
|
|
return false;
|
|
}
|
|
|
|
if (!AllocatePreinitializedGPUBuffer(EXPAND_BUFFER_SIZE, &m_expand_index_buffer,
|
|
&m_expand_index_buffer_allocation, &GSDevice::GenerateExpansionIndexBuffer))
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to allocate expansion index buffer");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CreateRootSignatures()
|
|
{
|
|
D3D12::RootSignatureBuilder rsb;
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Convert Pipeline Layout
|
|
//////////////////////////////////////////////////////////////////////////
|
|
rsb.SetInputAssemblerFlag();
|
|
rsb.Add32BitConstants(0, CONVERT_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, NUM_UTILITY_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_UTILITY_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
if (!(m_utility_root_signature = rsb.Create()))
|
|
return false;
|
|
D3D12::SetObjectName(m_utility_root_signature.get(), "Convert root signature");
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Draw/TFX Pipeline Layout
|
|
//////////////////////////////////////////////////////////////////////////
|
|
rsb.SetInputAssemblerFlag();
|
|
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
|
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
rsb.AddSRVParameter(0, D3D12_SHADER_VISIBILITY_VERTEX);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
|
if (!(m_tfx_root_signature = rsb.Create()))
|
|
return false;
|
|
D3D12::SetObjectName(m_tfx_root_signature.get(), "TFX root signature");
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompileConvertPipelines()
|
|
{
|
|
std::optional<std::string> shader = ReadShaderSource("shaders/dx11/convert.fx");
|
|
if (!shader)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/convert.fx.");
|
|
return false;
|
|
}
|
|
|
|
m_convert_vs = GetUtilityVertexShader(*shader, "vs_main");
|
|
if (!m_convert_vs)
|
|
return false;
|
|
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
AddUtilityVertexAttributes(gpb);
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoBlendingState();
|
|
gpb.SetVertexShader(m_convert_vs.get());
|
|
|
|
for (ShaderConvert i = ShaderConvert::COPY; i < ShaderConvert::Count; i = static_cast<ShaderConvert>(static_cast<int>(i) + 1))
|
|
{
|
|
const bool depth = HasDepthOutput(i);
|
|
const int index = static_cast<int>(i);
|
|
|
|
switch (i)
|
|
{
|
|
case ShaderConvert::RGBA8_TO_16_BITS:
|
|
case ShaderConvert::FLOAT32_TO_16_BITS:
|
|
{
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R16_UINT);
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
|
|
}
|
|
break;
|
|
case ShaderConvert::FLOAT32_TO_32_BITS:
|
|
{
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R32_UINT);
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
|
|
}
|
|
break;
|
|
case ShaderConvert::DATM_0:
|
|
case ShaderConvert::DATM_1:
|
|
case ShaderConvert::DATM_0_RTA_CORRECTION:
|
|
case ShaderConvert::DATM_1_RTA_CORRECTION:
|
|
{
|
|
gpb.ClearRenderTargets();
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
|
|
}
|
|
break;
|
|
default:
|
|
{
|
|
depth ? gpb.ClearRenderTargets() : gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetDepthStencilFormat(depth ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (IsDATMConvertShader(i))
|
|
{
|
|
const D3D12_DEPTH_STENCILOP_DESC sos = {
|
|
D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_REPLACE, D3D12_COMPARISON_FUNC_ALWAYS};
|
|
gpb.SetStencilState(true, 1, 1, sos, sos);
|
|
gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS);
|
|
}
|
|
else
|
|
{
|
|
gpb.SetDepthState(depth, depth, D3D12_COMPARISON_FUNC_ALWAYS);
|
|
gpb.SetNoStencilState();
|
|
}
|
|
|
|
gpb.SetColorWriteMask(0, ShaderConvertWriteMask(i));
|
|
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, shaderName(i)));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
|
|
m_convert[index] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_convert[index])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_convert[index].get(), TinyString::from_format("Convert pipeline {}", static_cast<int>(i)));
|
|
|
|
if (i == ShaderConvert::COPY)
|
|
{
|
|
// compile color copy pipelines
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
|
|
for (u32 j = 0; j < 16; j++)
|
|
{
|
|
pxAssert(!m_color_copy[j]);
|
|
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
|
|
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast<u8>(j));
|
|
m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_color_copy[j])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_format("Color copy pipeline (r={}, g={}, b={}, a={})",
|
|
j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u));
|
|
}
|
|
}
|
|
else if (i == ShaderConvert::RTA_CORRECTION)
|
|
{
|
|
// compile color copy pipelines
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
|
|
for (u32 j = 16; j < 32; j++)
|
|
{
|
|
pxAssert(!m_color_copy[j]);
|
|
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
|
|
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast<u8>(j - 16));
|
|
m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_color_copy[j])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_format("Color copy pipeline (r={}, g={}, b={}, a={})",
|
|
j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u));
|
|
}
|
|
}
|
|
else if (i == ShaderConvert::COLCLIP_INIT || i == ShaderConvert::COLCLIP_RESOLVE)
|
|
{
|
|
const bool is_setup = i == ShaderConvert::COLCLIP_INIT;
|
|
std::array<ComPtr<ID3D12PipelineState>, 2>& arr = is_setup ? m_colclip_setup_pipelines : m_colclip_finish_pipelines;
|
|
for (u32 ds = 0; ds < 2; ds++)
|
|
{
|
|
pxAssert(!arr[ds]);
|
|
|
|
gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R16G16B16A16_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN);
|
|
arr[ds] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!arr[ds])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(arr[ds].get(), TinyString::from_format("ColorClip {}/copy pipeline (ds={})", is_setup ? "setup" : "finish", ds));
|
|
}
|
|
}
|
|
}
|
|
|
|
for (u32 datm = 0; datm < 4; datm++)
|
|
{
|
|
const std::string entry_point(StringUtil::StdStringFromFormat("ps_stencil_image_init_%d", datm));
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, entry_point.c_str()));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R32_FLOAT);
|
|
gpb.SetPixelShader(ps.get());
|
|
gpb.SetNoDepthTestState();
|
|
gpb.SetNoStencilState();
|
|
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_BLEND_ZERO,
|
|
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_RED);
|
|
|
|
for (u32 ds = 0; ds < 2; ds++)
|
|
{
|
|
gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN);
|
|
m_date_image_setup_pipelines[ds][datm] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_date_image_setup_pipelines[ds][datm])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_date_image_setup_pipelines[ds][datm].get(),
|
|
TinyString::from_format("DATE image clear pipeline (ds={}, datm={})", ds, (datm == 1 || datm == 3)));
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompilePresentPipelines()
|
|
{
|
|
const std::optional<std::string> shader = ReadShaderSource("shaders/dx11/present.fx");
|
|
if (!shader)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/present.fx.");
|
|
return false;
|
|
}
|
|
|
|
ComPtr<ID3DBlob> vs = GetUtilityVertexShader(*shader, "vs_main");
|
|
if (!vs)
|
|
return false;
|
|
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
AddUtilityVertexAttributes(gpb);
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoBlendingState();
|
|
gpb.SetVertexShader(vs.get());
|
|
gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS);
|
|
gpb.SetNoStencilState();
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
|
|
for (PresentShader i = PresentShader::COPY; i < PresentShader::Count; i = static_cast<PresentShader>(static_cast<int>(i) + 1))
|
|
{
|
|
const int index = static_cast<int>(i);
|
|
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, shaderName(i)));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
|
|
m_present[index] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_present[index])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_present[index].get(), TinyString::from_format("Present pipeline {}", static_cast<int>(i)));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompileInterlacePipelines()
|
|
{
|
|
const std::optional<std::string> source = ReadShaderSource("shaders/dx11/interlace.fx");
|
|
if (!source)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/interlace.fx.");
|
|
return false;
|
|
}
|
|
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
AddUtilityVertexAttributes(gpb);
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoDepthTestState();
|
|
gpb.SetNoBlendingState();
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetVertexShader(m_convert_vs.get());
|
|
|
|
for (int i = 0; i < static_cast<int>(m_interlace.size()); i++)
|
|
{
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*source, StringUtil::StdStringFromFormat("ps_main%d", i).c_str()));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
|
|
m_interlace[i] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_interlace[i])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_interlace[i].get(), TinyString::from_format("Interlace pipeline {}", static_cast<int>(i)));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompileMergePipelines()
|
|
{
|
|
const std::optional<std::string> shader = ReadShaderSource("shaders/dx11/merge.fx");
|
|
if (!shader)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/merge.fx.");
|
|
return false;
|
|
}
|
|
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
AddUtilityVertexAttributes(gpb);
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoDepthTestState();
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetVertexShader(m_convert_vs.get());
|
|
|
|
for (int i = 0; i < static_cast<int>(m_merge.size()); i++)
|
|
{
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, StringUtil::StdStringFromFormat("ps_main%d", i).c_str()));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
gpb.SetBlendState(0, true, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD,
|
|
D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD);
|
|
|
|
m_merge[i] = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_merge[i])
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_merge[i].get(), TinyString::from_format("Merge pipeline {}", i));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::CompilePostProcessingPipelines()
|
|
{
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
AddUtilityVertexAttributes(gpb);
|
|
gpb.SetRootSignature(m_utility_root_signature.get());
|
|
gpb.SetNoCullRasterizationState();
|
|
gpb.SetNoDepthTestState();
|
|
gpb.SetNoBlendingState();
|
|
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
|
|
gpb.SetVertexShader(m_convert_vs.get());
|
|
|
|
{
|
|
const std::optional<std::string> shader = ReadShaderSource("shaders/common/fxaa.fx");
|
|
if (!shader)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/common/fxaa.fx.");
|
|
return false;
|
|
}
|
|
|
|
ShaderMacro sm;
|
|
sm.AddMacro("FXAA_HLSL", "1");
|
|
ComPtr<ID3DBlob> ps = m_shader_cache.GetPixelShader(*shader, sm.GetPtr());
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
|
|
m_fxaa_pipeline = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_fxaa_pipeline)
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_fxaa_pipeline.get(), "FXAA pipeline");
|
|
}
|
|
|
|
{
|
|
const std::optional<std::string> shader = ReadShaderSource("shaders/dx11/shadeboost.fx");
|
|
if (!shader)
|
|
{
|
|
Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/shadeboost.fx.");
|
|
return false;
|
|
}
|
|
|
|
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, "ps_main"));
|
|
if (!ps)
|
|
return false;
|
|
|
|
gpb.SetPixelShader(ps.get());
|
|
|
|
m_shadeboost_pipeline = gpb.Create(m_device.get(), m_shader_cache, false);
|
|
if (!m_shadeboost_pipeline)
|
|
return false;
|
|
|
|
D3D12::SetObjectName(m_shadeboost_pipeline.get(), "Shadeboost pipeline");
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::DestroyResources()
|
|
{
|
|
m_convert_vs.reset();
|
|
|
|
m_cas_sharpen_pipeline.reset();
|
|
m_cas_upscale_pipeline.reset();
|
|
m_cas_root_signature.reset();
|
|
|
|
m_tfx_pipelines.clear();
|
|
m_tfx_pixel_shaders.clear();
|
|
m_tfx_vertex_shaders.clear();
|
|
m_interlace = {};
|
|
m_merge = {};
|
|
m_color_copy = {};
|
|
m_present = {};
|
|
m_convert = {};
|
|
m_colclip_setup_pipelines = {};
|
|
m_colclip_finish_pipelines = {};
|
|
m_date_image_setup_pipelines = {};
|
|
m_fxaa_pipeline.reset();
|
|
m_shadeboost_pipeline.reset();
|
|
m_imgui_pipeline.reset();
|
|
|
|
for (const auto& it : m_samplers)
|
|
{
|
|
if (it.second)
|
|
m_sampler_heap_manager.Free(it.second.index);
|
|
}
|
|
m_samplers.clear();
|
|
InvalidateSamplerGroups();
|
|
|
|
m_expand_index_buffer.reset();
|
|
m_expand_index_buffer_allocation.reset();
|
|
m_texture_stream_buffer.Destroy(false);
|
|
m_pixel_constant_buffer.Destroy(false);
|
|
m_vertex_constant_buffer.Destroy(false);
|
|
m_index_stream_buffer.Destroy(false);
|
|
m_vertex_stream_buffer.Destroy(false);
|
|
|
|
m_utility_root_signature.reset();
|
|
m_tfx_root_signature.reset();
|
|
|
|
if (m_null_texture)
|
|
{
|
|
m_null_texture->Destroy(false);
|
|
m_null_texture.reset();
|
|
}
|
|
|
|
m_shader_cache.Close();
|
|
|
|
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
|
|
m_timestamp_query_buffer.reset();
|
|
m_timestamp_query_allocation.reset();
|
|
m_sampler_heap_manager.Destroy();
|
|
m_dsv_heap_manager.Destroy();
|
|
m_rtv_heap_manager.Destroy();
|
|
m_descriptor_heap_manager.Destroy();
|
|
m_command_lists = {};
|
|
m_current_command_list = 0;
|
|
m_completed_fence_value = 0;
|
|
m_current_fence_value = 0;
|
|
if (m_fence_event)
|
|
{
|
|
CloseHandle(m_fence_event);
|
|
m_fence_event = {};
|
|
}
|
|
|
|
m_allocator.reset();
|
|
m_command_queue.reset();
|
|
m_device.reset();
|
|
}
|
|
|
|
const ID3DBlob* GSDevice12::GetTFXVertexShader(GSHWDrawConfig::VSSelector sel)
|
|
{
|
|
auto it = m_tfx_vertex_shaders.find(sel.key);
|
|
if (it != m_tfx_vertex_shaders.end())
|
|
return it->second.get();
|
|
|
|
ShaderMacro sm;
|
|
sm.AddMacro("VERTEX_SHADER", 1);
|
|
sm.AddMacro("VS_TME", sel.tme);
|
|
sm.AddMacro("VS_FST", sel.fst);
|
|
sm.AddMacro("VS_IIP", sel.iip);
|
|
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
|
|
|
const char* entry_point = (sel.expand != GSHWDrawConfig::VSExpand::None) ? "vs_main_expand" : "vs_main";
|
|
ComPtr<ID3DBlob> vs(m_shader_cache.GetVertexShader(m_tfx_source, sm.GetPtr(), entry_point));
|
|
it = m_tfx_vertex_shaders.emplace(sel.key, std::move(vs)).first;
|
|
return it->second.get();
|
|
}
|
|
|
|
const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sel)
|
|
{
|
|
auto it = m_tfx_pixel_shaders.find(sel);
|
|
if (it != m_tfx_pixel_shaders.end())
|
|
return it->second.get();
|
|
|
|
ShaderMacro sm;
|
|
sm.AddMacro("PIXEL_SHADER", 1);
|
|
sm.AddMacro("PS_FST", sel.fst);
|
|
sm.AddMacro("PS_WMS", sel.wms);
|
|
sm.AddMacro("PS_WMT", sel.wmt);
|
|
sm.AddMacro("PS_ADJS", sel.adjs);
|
|
sm.AddMacro("PS_ADJT", sel.adjt);
|
|
sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
|
|
sm.AddMacro("PS_AEM", sel.aem);
|
|
sm.AddMacro("PS_TFX", sel.tfx);
|
|
sm.AddMacro("PS_TCC", sel.tcc);
|
|
sm.AddMacro("PS_DATE", sel.date);
|
|
sm.AddMacro("PS_ATST", sel.atst);
|
|
sm.AddMacro("PS_AFAIL", sel.afail);
|
|
sm.AddMacro("PS_FOG", sel.fog);
|
|
sm.AddMacro("PS_IIP", sel.iip);
|
|
sm.AddMacro("PS_BLEND_HW", sel.blend_hw);
|
|
sm.AddMacro("PS_A_MASKED", sel.a_masked);
|
|
sm.AddMacro("PS_FBA", sel.fba);
|
|
sm.AddMacro("PS_FBMASK", sel.fbmask);
|
|
sm.AddMacro("PS_LTF", sel.ltf);
|
|
sm.AddMacro("PS_TCOFFSETHACK", sel.tcoffsethack);
|
|
sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler);
|
|
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
|
|
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
|
|
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
|
|
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
|
|
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
|
|
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
|
|
sm.AddMacro("PS_READ16_SRC", sel.real16src);
|
|
sm.AddMacro("PS_WRITE_RG", sel.write_rg);
|
|
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
|
|
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
|
|
sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);
|
|
sm.AddMacro("PS_DST_FMT", sel.dst_fmt);
|
|
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
|
|
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
|
|
sm.AddMacro("PS_COLCLIP_HW", sel.colclip_hw);
|
|
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
|
|
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
|
|
sm.AddMacro("PS_COLCLIP", sel.colclip);
|
|
sm.AddMacro("PS_BLEND_A", sel.blend_a);
|
|
sm.AddMacro("PS_BLEND_B", sel.blend_b);
|
|
sm.AddMacro("PS_BLEND_C", sel.blend_c);
|
|
sm.AddMacro("PS_BLEND_D", sel.blend_d);
|
|
sm.AddMacro("PS_BLEND_MIX", sel.blend_mix);
|
|
sm.AddMacro("PS_ROUND_INV", sel.round_inv);
|
|
sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a);
|
|
sm.AddMacro("PS_PABE", sel.pabe);
|
|
sm.AddMacro("PS_DITHER", sel.dither);
|
|
sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust);
|
|
sm.AddMacro("PS_ZCLAMP", sel.zclamp);
|
|
sm.AddMacro("PS_ZFLOOR", sel.zfloor);
|
|
sm.AddMacro("PS_SCANMSK", sel.scanmsk);
|
|
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);
|
|
sm.AddMacro("PS_MANUAL_LOD", sel.manual_lod);
|
|
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
|
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
|
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
|
|
|
ComPtr<ID3DBlob> ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main"));
|
|
it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first;
|
|
return it->second.get();
|
|
}
|
|
|
|
GSDevice12::ComPtr<ID3D12PipelineState> GSDevice12::CreateTFXPipeline(const PipelineSelector& p)
|
|
{
|
|
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY_TYPE, 3> topology_lookup = {{
|
|
D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, // Point
|
|
D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, // Line
|
|
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // Triangle
|
|
}};
|
|
|
|
GSHWDrawConfig::BlendState pbs{p.bs};
|
|
GSHWDrawConfig::PSSelector pps{p.ps};
|
|
if (!p.bs.IsEffective(p.cms))
|
|
{
|
|
// disable blending when colours are masked
|
|
pbs = {};
|
|
pps.no_color1 = true;
|
|
}
|
|
|
|
const ID3DBlob* vs = GetTFXVertexShader(p.vs);
|
|
const ID3DBlob* ps = GetTFXPixelShader(pps);
|
|
if (!vs || !ps)
|
|
return nullptr;
|
|
|
|
// Common state
|
|
D3D12::GraphicsPipelineBuilder gpb;
|
|
gpb.SetRootSignature(m_tfx_root_signature.get());
|
|
gpb.SetPrimitiveTopologyType(topology_lookup[p.topology]);
|
|
gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false);
|
|
if (p.rt)
|
|
{
|
|
const GSTexture::Format format = IsDATEModePrimIDInit(p.ps.date) ?
|
|
GSTexture::Format::PrimID :
|
|
(p.ps.colclip_hw ? GSTexture::Format::ColorClip : GSTexture::Format::Color);
|
|
|
|
DXGI_FORMAT native_format;
|
|
LookupNativeFormat(format, nullptr, nullptr, &native_format, nullptr);
|
|
gpb.SetRenderTarget(0, native_format);
|
|
}
|
|
if (p.ds)
|
|
gpb.SetDepthStencilFormat(DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
|
|
|
|
// Shaders
|
|
gpb.SetVertexShader(vs);
|
|
gpb.SetPixelShader(ps);
|
|
|
|
// IA
|
|
if (p.vs.expand == GSHWDrawConfig::VSExpand::None)
|
|
{
|
|
gpb.AddVertexAttribute("TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0);
|
|
gpb.AddVertexAttribute("COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8);
|
|
gpb.AddVertexAttribute("TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12);
|
|
gpb.AddVertexAttribute("POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16);
|
|
gpb.AddVertexAttribute("POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20);
|
|
gpb.AddVertexAttribute("TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24);
|
|
gpb.AddVertexAttribute("COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28);
|
|
}
|
|
|
|
// DepthStencil
|
|
if (p.ds)
|
|
{
|
|
static const D3D12_COMPARISON_FUNC ztst[] = {D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_ALWAYS,
|
|
D3D12_COMPARISON_FUNC_GREATER_EQUAL, D3D12_COMPARISON_FUNC_GREATER};
|
|
gpb.SetDepthState((p.dss.ztst != ZTST_ALWAYS || p.dss.zwe), p.dss.zwe, ztst[p.dss.ztst]);
|
|
if (p.dss.date)
|
|
{
|
|
const D3D12_DEPTH_STENCILOP_DESC sos{D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP,
|
|
p.dss.date_one ? D3D12_STENCIL_OP_ZERO : D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_EQUAL};
|
|
gpb.SetStencilState(true, 1, 1, sos, sos);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
gpb.SetNoDepthTestState();
|
|
}
|
|
|
|
// Blending
|
|
if (IsDATEModePrimIDInit(p.ps.date))
|
|
{
|
|
// image DATE prepass
|
|
gpb.SetBlendState(0, true, D3D12_BLEND_ONE, D3D12_BLEND_ONE, D3D12_BLEND_OP_MIN, D3D12_BLEND_ONE,
|
|
D3D12_BLEND_ONE, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_RED);
|
|
}
|
|
else if (pbs.enable)
|
|
{
|
|
// clang-format off
|
|
static constexpr std::array<D3D12_BLEND, 16> d3d_blend_factors = { {
|
|
D3D12_BLEND_SRC_COLOR, D3D12_BLEND_INV_SRC_COLOR, D3D12_BLEND_DEST_COLOR, D3D12_BLEND_INV_DEST_COLOR,
|
|
D3D12_BLEND_SRC1_COLOR, D3D12_BLEND_INV_SRC1_COLOR, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA,
|
|
D3D12_BLEND_DEST_ALPHA, D3D12_BLEND_INV_DEST_ALPHA, D3D12_BLEND_SRC1_ALPHA, D3D12_BLEND_INV_SRC1_ALPHA,
|
|
D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_BLEND_FACTOR, D3D12_BLEND_ONE, D3D12_BLEND_ZERO
|
|
} };
|
|
static constexpr std::array<D3D12_BLEND_OP, 3> d3d_blend_ops = { {
|
|
D3D12_BLEND_OP_ADD, D3D12_BLEND_OP_SUBTRACT, D3D12_BLEND_OP_REV_SUBTRACT
|
|
} };
|
|
// clang-format on
|
|
|
|
gpb.SetBlendState(0, true, d3d_blend_factors[pbs.src_factor], d3d_blend_factors[pbs.dst_factor],
|
|
d3d_blend_ops[pbs.op], d3d_blend_factors[pbs.src_factor_alpha], d3d_blend_factors[pbs.dst_factor_alpha],
|
|
D3D12_BLEND_OP_ADD, p.cms.wrgba);
|
|
}
|
|
else
|
|
{
|
|
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
|
|
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, p.cms.wrgba);
|
|
}
|
|
|
|
ComPtr<ID3D12PipelineState> pipeline(gpb.Create(m_device.get(), m_shader_cache));
|
|
if (pipeline)
|
|
{
|
|
D3D12::SetObjectName(
|
|
pipeline.get(), TinyString::from_format("TFX Pipeline {:08X}/{:08X}{:016X}", p.vs.key, p.ps.key_hi, p.ps.key_lo));
|
|
}
|
|
|
|
return pipeline;
|
|
}
|
|
|
|
const ID3D12PipelineState* GSDevice12::GetTFXPipeline(const PipelineSelector& p)
|
|
{
|
|
auto it = m_tfx_pipelines.find(p);
|
|
if (it != m_tfx_pipelines.end())
|
|
return it->second.get();
|
|
|
|
ComPtr<ID3D12PipelineState> pipeline(CreateTFXPipeline(p));
|
|
it = m_tfx_pipelines.emplace(p, std::move(pipeline)).first;
|
|
return it->second.get();
|
|
}
|
|
|
|
bool GSDevice12::BindDrawPipeline(const PipelineSelector& p)
|
|
{
|
|
const ID3D12PipelineState* pipeline = GetTFXPipeline(p);
|
|
if (!pipeline)
|
|
return false;
|
|
|
|
SetPipeline(pipeline);
|
|
|
|
return ApplyTFXState();
|
|
}
|
|
|
|
void GSDevice12::InitializeState()
|
|
{
|
|
for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++)
|
|
m_tfx_textures[i] = m_null_texture->GetSRVDescriptor();
|
|
m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key;
|
|
|
|
InvalidateCachedState();
|
|
}
|
|
|
|
void GSDevice12::InitializeSamplers()
|
|
{
|
|
bool result = GetSampler(&m_point_sampler_cpu, GSHWDrawConfig::SamplerSelector::Point());
|
|
result = result && GetSampler(&m_linear_sampler_cpu, GSHWDrawConfig::SamplerSelector::Linear());
|
|
result = result && GetSampler(&m_tfx_sampler, m_tfx_sampler_sel);
|
|
|
|
if (!result)
|
|
pxFailRel("Failed to initialize samplers");
|
|
}
|
|
|
|
GSDevice12::WaitType GSDevice12::GetWaitType(bool wait, bool spin)
|
|
{
|
|
if (!wait)
|
|
return WaitType::None;
|
|
if (spin)
|
|
return WaitType::Spin;
|
|
else
|
|
return WaitType::Sleep;
|
|
}
|
|
|
|
void GSDevice12::ExecuteCommandList(bool wait_for_completion)
|
|
{
|
|
EndRenderPass();
|
|
ExecuteCommandList(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
|
|
InvalidateCachedState();
|
|
}
|
|
|
|
void GSDevice12::ExecuteCommandList(bool wait_for_completion, const char* reason, ...)
|
|
{
|
|
std::va_list ap;
|
|
va_start(ap, reason);
|
|
const std::string reason_str(StringUtil::StdStringFromFormatV(reason, ap));
|
|
va_end(ap);
|
|
|
|
Console.Warning("D3D12: Executing command buffer due to '%s'", reason_str.c_str());
|
|
ExecuteCommandList(wait_for_completion);
|
|
}
|
|
|
|
void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion, const char* reason)
|
|
{
|
|
Console.Warning("D3D12: Executing command buffer due to '%s'", reason);
|
|
|
|
const bool was_in_render_pass = m_in_render_pass;
|
|
EndRenderPass();
|
|
ExecuteCommandList(GetWaitType(wait_for_completion, GSConfig.HWSpinCPUForReadbacks));
|
|
InvalidateCachedState();
|
|
|
|
if (was_in_render_pass)
|
|
{
|
|
// rebind everything except RT, because the RP does that for us
|
|
ApplyBaseState(m_dirty_flags & ~DIRTY_FLAG_RENDER_TARGET, GetCommandList().list4.get());
|
|
m_dirty_flags &= ~DIRTY_BASE_STATE;
|
|
|
|
// restart render pass
|
|
BeginRenderPass(m_current_render_target ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
m_current_render_target ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
m_current_depth_target ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
m_current_depth_target ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS);
|
|
}
|
|
}
|
|
|
|
void GSDevice12::ExecuteCommandListForReadback()
|
|
{
|
|
ExecuteCommandList(true);
|
|
}
|
|
|
|
void GSDevice12::InvalidateCachedState()
|
|
{
|
|
m_dirty_flags |= DIRTY_BASE_STATE | DIRTY_TFX_STATE | DIRTY_UTILITY_STATE | DIRTY_CONSTANT_BUFFER_STATE;
|
|
m_current_root_signature = RootSignature::Undefined;
|
|
m_utility_texture_cpu.Clear();
|
|
m_utility_texture_gpu.Clear();
|
|
m_utility_sampler_cpu.Clear();
|
|
m_utility_sampler_gpu.Clear();
|
|
m_tfx_textures_handle_gpu.Clear();
|
|
m_tfx_samplers_handle_gpu.Clear();
|
|
m_tfx_rt_textures_handle_gpu.Clear();
|
|
}
|
|
|
|
void GSDevice12::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, size_t stride)
|
|
{
|
|
if (m_vertex_buffer.BufferLocation == buffer && m_vertex_buffer.SizeInBytes == size &&
|
|
m_vertex_buffer.StrideInBytes == stride)
|
|
return;
|
|
|
|
m_vertex_buffer.BufferLocation = buffer;
|
|
m_vertex_buffer.SizeInBytes = size;
|
|
m_vertex_buffer.StrideInBytes = stride;
|
|
m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER;
|
|
}
|
|
|
|
void GSDevice12::SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS buffer, size_t size, DXGI_FORMAT type)
|
|
{
|
|
if (m_index_buffer.BufferLocation == buffer && m_index_buffer.SizeInBytes == size && m_index_buffer.Format == type)
|
|
return;
|
|
|
|
m_index_buffer.BufferLocation = buffer;
|
|
m_index_buffer.SizeInBytes = size;
|
|
m_index_buffer.Format = type;
|
|
m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER;
|
|
}
|
|
|
|
void GSDevice12::SetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY topology)
|
|
{
|
|
if (m_primitive_topology == topology)
|
|
return;
|
|
|
|
m_primitive_topology = topology;
|
|
m_dirty_flags |= DIRTY_FLAG_PRIMITIVE_TOPOLOGY;
|
|
}
|
|
|
|
void GSDevice12::SetBlendConstants(u8 color)
|
|
{
|
|
if (m_blend_constant_color == color)
|
|
return;
|
|
|
|
m_blend_constant_color = color;
|
|
m_dirty_flags |= DIRTY_FLAG_BLEND_CONSTANTS;
|
|
}
|
|
|
|
void GSDevice12::SetStencilRef(u8 ref)
|
|
{
|
|
if (m_stencil_ref == ref)
|
|
return;
|
|
|
|
m_stencil_ref = ref;
|
|
m_dirty_flags |= DIRTY_FLAG_STENCIL_REF;
|
|
}
|
|
|
|
void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state, bool feedback)
|
|
{
|
|
D3D12DescriptorHandle handle;
|
|
if (sr)
|
|
{
|
|
GSTexture12* dtex = static_cast<GSTexture12*>(sr);
|
|
if (check_state)
|
|
{
|
|
if (dtex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource && InRenderPass())
|
|
{
|
|
GL_INS("Ending render pass due to resource transition");
|
|
EndRenderPass();
|
|
}
|
|
|
|
dtex->CommitClear();
|
|
dtex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
dtex->SetUseFenceCounter(GetCurrentFenceValue());
|
|
handle = (feedback && !m_enhanced_barriers) ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor();
|
|
}
|
|
else
|
|
{
|
|
handle = m_null_texture->GetSRVDescriptor();
|
|
}
|
|
|
|
if (m_tfx_textures[i] == handle)
|
|
return;
|
|
|
|
m_tfx_textures[i] = handle;
|
|
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
|
|
}
|
|
|
|
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
|
{
|
|
if (m_tfx_sampler_sel == sel.key)
|
|
return;
|
|
|
|
GetSampler(&m_tfx_sampler, sel);
|
|
m_tfx_sampler_sel = sel.key;
|
|
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS;
|
|
}
|
|
|
|
void GSDevice12::SetUtilityRootSignature()
|
|
{
|
|
if (m_current_root_signature == RootSignature::Utility)
|
|
return;
|
|
|
|
m_current_root_signature = RootSignature::Utility;
|
|
m_dirty_flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_PIPELINE;
|
|
GetCommandList().list4->SetGraphicsRootSignature(m_utility_root_signature.get());
|
|
}
|
|
|
|
void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle& sampler)
|
|
{
|
|
D3D12DescriptorHandle handle;
|
|
if (dtex)
|
|
{
|
|
GSTexture12* d12tex = static_cast<GSTexture12*>(dtex);
|
|
d12tex->CommitClear();
|
|
d12tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
d12tex->SetUseFenceCounter(GetCurrentFenceValue());
|
|
handle = d12tex->GetSRVDescriptor();
|
|
}
|
|
else
|
|
{
|
|
handle = m_null_texture->GetSRVDescriptor();
|
|
}
|
|
|
|
if (m_utility_texture_cpu != handle)
|
|
{
|
|
m_utility_texture_cpu = handle;
|
|
m_dirty_flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE;
|
|
|
|
if (!GetTextureGroupDescriptors(&m_utility_texture_gpu, &handle, 1))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of utility texture descriptors");
|
|
SetUtilityTexture(dtex, sampler);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (m_utility_sampler_cpu != sampler)
|
|
{
|
|
m_utility_sampler_cpu = sampler;
|
|
m_dirty_flags |= DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE;
|
|
|
|
if (!GetSamplerAllocator().LookupSingle(&m_utility_sampler_gpu, sampler))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of utility sampler descriptors");
|
|
SetUtilityTexture(dtex, sampler);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void GSDevice12::SetUtilityPushConstants(const void* data, u32 size)
|
|
{
|
|
GetCommandList().list4->SetGraphicsRoot32BitConstants(
|
|
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, (size + 3) / sizeof(u32), data, 0);
|
|
}
|
|
|
|
void GSDevice12::UnbindTexture(GSTexture12* tex)
|
|
{
|
|
for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++)
|
|
{
|
|
if (m_tfx_textures[i] == tex->GetSRVDescriptor() || m_tfx_textures[i] == tex->GetFBLDescriptor())
|
|
{
|
|
m_tfx_textures[i] = m_null_texture->GetSRVDescriptor();
|
|
m_dirty_flags |= DIRTY_FLAG_TFX_TEXTURES;
|
|
}
|
|
}
|
|
if (m_current_render_target == tex)
|
|
{
|
|
EndRenderPass();
|
|
m_current_render_target = nullptr;
|
|
}
|
|
if (m_current_depth_target == tex)
|
|
{
|
|
EndRenderPass();
|
|
m_current_depth_target = nullptr;
|
|
m_current_depth_read_only = false;
|
|
}
|
|
}
|
|
|
|
void GSDevice12::RenderTextureMipmap(
|
|
GSTexture12* texture, u32 dst_level, u32 dst_width, u32 dst_height, u32 src_level, u32 src_width, u32 src_height)
|
|
{
|
|
EndRenderPass();
|
|
|
|
// we need a temporary SRV and RTV for each mip level
|
|
// Safe to use the init buffer after exec, because everything will be done with the texture.
|
|
D3D12DescriptorHandle rtv_handle;
|
|
while (!GetRTVHeapManager().Allocate(&rtv_handle))
|
|
ExecuteCommandList(false);
|
|
|
|
D3D12DescriptorHandle srv_handle;
|
|
while (!GetDescriptorHeapManager().Allocate(&srv_handle))
|
|
ExecuteCommandList(false);
|
|
|
|
// Setup views. This will be a partial view for the SRV.
|
|
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {texture->GetDXGIFormat(), D3D12_RTV_DIMENSION_TEXTURE2D};
|
|
rtv_desc.Texture2D = {dst_level, 0u};
|
|
m_device.get()->CreateRenderTargetView(texture->GetResource(), &rtv_desc, rtv_handle);
|
|
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
|
|
texture->GetDXGIFormat(), D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
|
|
srv_desc.Texture2D = {src_level, 1u, 0u, 0.0f};
|
|
m_device.get()->CreateShaderResourceView(texture->GetResource(), &srv_desc, srv_handle);
|
|
|
|
// We need to set the descriptors up manually, because we're not going through GSTexture.
|
|
if (!GetTextureGroupDescriptors(&m_utility_texture_gpu, &srv_handle, 1))
|
|
ExecuteCommandList(false);
|
|
if (m_utility_sampler_cpu != m_linear_sampler_cpu)
|
|
{
|
|
m_dirty_flags |= DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE;
|
|
if (!GetSamplerAllocator().LookupSingle(&m_utility_sampler_gpu, m_linear_sampler_cpu))
|
|
ExecuteCommandList(false);
|
|
}
|
|
|
|
// *now* we don't have to worry about running out of anything.
|
|
const D3D12CommandList& cmdlist = GetCommandList();
|
|
if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
|
|
texture->TransitionSubresourceToState(
|
|
cmdlist, src_level, texture->GetResourceState(), GSTexture12::ResourceState::PixelShaderResource);
|
|
if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget)
|
|
texture->TransitionSubresourceToState(
|
|
cmdlist, dst_level, texture->GetResourceState(), GSTexture12::ResourceState::RenderTarget);
|
|
|
|
// We set the state directly here.
|
|
constexpr u32 MODIFIED_STATE = DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET;
|
|
m_dirty_flags &= ~MODIFIED_STATE;
|
|
|
|
// Using a render pass is probably a bit overkill.
|
|
const D3D12_DISCARD_REGION discard_region = {0u, nullptr, dst_level, 1u};
|
|
cmdlist.list4->DiscardResource(texture->GetResource(), &discard_region);
|
|
cmdlist.list4->OMSetRenderTargets(1, &rtv_handle.cpu_handle, FALSE, nullptr);
|
|
|
|
const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height), 0.0f, 1.0f};
|
|
cmdlist.list4->RSSetViewports(1, &vp);
|
|
|
|
const D3D12_RECT scissor = {0, 0, static_cast<LONG>(dst_width), static_cast<LONG>(dst_height)};
|
|
cmdlist.list4->RSSetScissorRects(1, &scissor);
|
|
|
|
SetUtilityRootSignature();
|
|
SetPipeline(m_convert[static_cast<int>(ShaderConvert::COPY)].get());
|
|
DrawStretchRect(GSVector4(0.0f, 0.0f, 1.0f, 1.0f),
|
|
GSVector4(0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height)),
|
|
GSVector2i(dst_width, dst_height));
|
|
|
|
if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
|
|
texture->TransitionSubresourceToState(
|
|
cmdlist, src_level, GSTexture12::ResourceState::PixelShaderResource, texture->GetResourceState());
|
|
if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget)
|
|
texture->TransitionSubresourceToState(
|
|
cmdlist, dst_level, GSTexture12::ResourceState::RenderTarget, texture->GetResourceState());
|
|
|
|
// Must destroy after current cmdlist.
|
|
DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle);
|
|
DeferDescriptorDestruction(m_rtv_heap_manager, &rtv_handle);
|
|
|
|
// Restore for next normal draw.
|
|
m_dirty_flags |= MODIFIED_STATE;
|
|
}
|
|
|
|
bool GSDevice12::InRenderPass()
|
|
{
|
|
return m_in_render_pass;
|
|
}
|
|
|
|
void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_begin,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE color_end, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE depth_begin,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE depth_end, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE stencil_begin,
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE stencil_end, GSVector4 clear_color, float clear_depth, u8 clear_stencil)
|
|
{
|
|
if (m_in_render_pass)
|
|
EndRenderPass();
|
|
|
|
// we're setting the RT here.
|
|
m_dirty_flags &= ~DIRTY_FLAG_RENDER_TARGET;
|
|
m_in_render_pass = true;
|
|
|
|
if (stencil_end == D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD)
|
|
GL_INS("D3D12: BeginRenderPass() end stencil is DISCARDED.");
|
|
|
|
D3D12_RENDER_PASS_RENDER_TARGET_DESC rt = {};
|
|
if (m_current_render_target)
|
|
{
|
|
rt.cpuDescriptor = m_current_render_target->GetWriteDescriptor();
|
|
rt.EndingAccess.Type = color_end;
|
|
rt.BeginningAccess.Type = color_begin;
|
|
if (color_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR)
|
|
{
|
|
LookupNativeFormat(m_current_render_target->GetFormat(), nullptr,
|
|
&rt.BeginningAccess.Clear.ClearValue.Format, nullptr, nullptr);
|
|
GSVector4::store<false>(rt.BeginningAccess.Clear.ClearValue.Color, clear_color);
|
|
}
|
|
}
|
|
|
|
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds = {};
|
|
if (m_current_depth_target)
|
|
{
|
|
ds.cpuDescriptor = m_current_depth_read_only ? m_current_depth_target->GetReadDepthViewDescriptor() : m_current_depth_target->GetWriteDescriptor();
|
|
ds.DepthEndingAccess.Type = depth_end;
|
|
ds.DepthBeginningAccess.Type = depth_begin;
|
|
if (depth_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR)
|
|
{
|
|
LookupNativeFormat(m_current_depth_target->GetFormat(), nullptr, nullptr, nullptr,
|
|
&ds.DepthBeginningAccess.Clear.ClearValue.Format);
|
|
ds.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = clear_depth;
|
|
}
|
|
ds.StencilEndingAccess.Type = stencil_end;
|
|
ds.StencilBeginningAccess.Type = stencil_begin;
|
|
if (stencil_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR)
|
|
{
|
|
LookupNativeFormat(m_current_depth_target->GetFormat(), nullptr, nullptr, nullptr,
|
|
&ds.StencilBeginningAccess.Clear.ClearValue.Format);
|
|
ds.StencilBeginningAccess.Clear.ClearValue.DepthStencil.Stencil = clear_stencil;
|
|
}
|
|
}
|
|
|
|
GetCommandList().list4->BeginRenderPass(m_current_render_target ? 1 : 0,
|
|
m_current_render_target ? &rt : nullptr, m_current_depth_target ? &ds : nullptr,
|
|
(m_current_depth_target && m_current_depth_read_only) ? (D3D12_RENDER_PASS_FLAG_BIND_READ_ONLY_DEPTH) : D3D12_RENDER_PASS_FLAG_NONE);
|
|
}
|
|
|
|
void GSDevice12::EndRenderPass()
|
|
{
|
|
if (!m_in_render_pass)
|
|
return;
|
|
|
|
m_in_render_pass = false;
|
|
|
|
// to render again, we need to reset OM
|
|
m_dirty_flags |= DIRTY_FLAG_RENDER_TARGET;
|
|
|
|
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
|
|
|
|
GetCommandList().list4->EndRenderPass();
|
|
}
|
|
|
|
void GSDevice12::SetViewport(const D3D12_VIEWPORT& viewport)
|
|
{
|
|
if (std::memcmp(&viewport, &m_viewport, sizeof(m_viewport)) == 0)
|
|
return;
|
|
|
|
std::memcpy(&m_viewport, &viewport, sizeof(m_viewport));
|
|
m_dirty_flags |= DIRTY_FLAG_VIEWPORT;
|
|
}
|
|
|
|
void GSDevice12::SetScissor(const GSVector4i& scissor)
|
|
{
|
|
if (m_scissor.eq(scissor))
|
|
return;
|
|
|
|
m_scissor = scissor;
|
|
m_dirty_flags |= DIRTY_FLAG_SCISSOR;
|
|
}
|
|
|
|
void GSDevice12::SetPipeline(const ID3D12PipelineState* pipeline)
|
|
{
|
|
if (m_current_pipeline == pipeline)
|
|
return;
|
|
|
|
m_current_pipeline = pipeline;
|
|
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
|
}
|
|
|
|
__ri void GSDevice12::ApplyBaseState(u32 flags, ID3D12GraphicsCommandList* cmdlist)
|
|
{
|
|
if (flags & DIRTY_FLAG_VERTEX_BUFFER)
|
|
cmdlist->IASetVertexBuffers(0, 1, &m_vertex_buffer);
|
|
|
|
if (flags & DIRTY_FLAG_INDEX_BUFFER)
|
|
cmdlist->IASetIndexBuffer(&m_index_buffer);
|
|
|
|
if (flags & DIRTY_FLAG_PRIMITIVE_TOPOLOGY)
|
|
cmdlist->IASetPrimitiveTopology(m_primitive_topology);
|
|
|
|
if (flags & DIRTY_FLAG_PIPELINE)
|
|
cmdlist->SetPipelineState(const_cast<ID3D12PipelineState*>(m_current_pipeline));
|
|
|
|
if (flags & DIRTY_FLAG_VIEWPORT)
|
|
cmdlist->RSSetViewports(1, &m_viewport);
|
|
|
|
if (flags & DIRTY_FLAG_SCISSOR)
|
|
{
|
|
const D3D12_RECT rc{m_scissor.x, m_scissor.y, m_scissor.z, m_scissor.w};
|
|
cmdlist->RSSetScissorRects(1, &rc);
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_BLEND_CONSTANTS)
|
|
{
|
|
const GSVector4 col(static_cast<float>(m_blend_constant_color) / 128.0f);
|
|
cmdlist->OMSetBlendFactor(col.v);
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_STENCIL_REF)
|
|
cmdlist->OMSetStencilRef(m_stencil_ref);
|
|
|
|
if (flags & DIRTY_FLAG_RENDER_TARGET)
|
|
{
|
|
if (m_current_render_target)
|
|
{
|
|
cmdlist->OMSetRenderTargets(1, &m_current_render_target->GetWriteDescriptor().cpu_handle, FALSE,
|
|
m_current_depth_target ?
|
|
(m_current_depth_read_only ? &m_current_depth_target->GetReadDepthViewDescriptor().cpu_handle : &m_current_depth_target->GetWriteDescriptor().cpu_handle) :
|
|
nullptr);
|
|
}
|
|
else if (m_current_depth_target)
|
|
{
|
|
cmdlist->OMSetRenderTargets(0, nullptr, FALSE, m_current_depth_read_only ? &m_current_depth_target->GetReadDepthViewDescriptor().cpu_handle : &m_current_depth_target->GetWriteDescriptor().cpu_handle);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool GSDevice12::ApplyTFXState(bool already_execed)
|
|
{
|
|
if (m_current_root_signature == RootSignature::TFX && m_dirty_flags == 0)
|
|
return true;
|
|
|
|
u32 flags = m_dirty_flags;
|
|
m_dirty_flags &= ~(DIRTY_TFX_STATE | DIRTY_CONSTANT_BUFFER_STATE);
|
|
|
|
// do cbuffer first, because it's the most likely to cause an exec
|
|
if (flags & DIRTY_FLAG_VS_CONSTANT_BUFFER)
|
|
{
|
|
if (!m_vertex_constant_buffer.ReserveMemory(
|
|
sizeof(m_vs_cb_cache), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
|
|
{
|
|
if (already_execed)
|
|
{
|
|
Console.Error("D3D12: Failed to reserve vertex uniform space");
|
|
return false;
|
|
}
|
|
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of vertex uniform space");
|
|
return ApplyTFXState(true);
|
|
}
|
|
|
|
std::memcpy(m_vertex_constant_buffer.GetCurrentHostPointer(), &m_vs_cb_cache, sizeof(m_vs_cb_cache));
|
|
m_tfx_constant_buffers[0] = m_vertex_constant_buffer.GetCurrentGPUPointer();
|
|
m_vertex_constant_buffer.CommitMemory(sizeof(m_vs_cb_cache));
|
|
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING;
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER)
|
|
{
|
|
if (!m_pixel_constant_buffer.ReserveMemory(
|
|
sizeof(m_ps_cb_cache), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
|
|
{
|
|
if (already_execed)
|
|
{
|
|
Console.Error("D3D12: Failed to reserve pixel uniform space");
|
|
return false;
|
|
}
|
|
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of pixel uniform space");
|
|
return ApplyTFXState(true);
|
|
}
|
|
|
|
std::memcpy(m_pixel_constant_buffer.GetCurrentHostPointer(), &m_ps_cb_cache, sizeof(m_ps_cb_cache));
|
|
m_tfx_constant_buffers[1] = m_pixel_constant_buffer.GetCurrentGPUPointer();
|
|
m_pixel_constant_buffer.CommitMemory(sizeof(m_ps_cb_cache));
|
|
flags |= DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING;
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_TFX_SAMPLERS)
|
|
{
|
|
if (!GetSamplerAllocator().LookupSingle(&m_tfx_samplers_handle_gpu, m_tfx_sampler))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of sampler groups");
|
|
return ApplyTFXState(true);
|
|
}
|
|
|
|
flags |= DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE;
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_TFX_TEXTURES)
|
|
{
|
|
if (!GetTextureGroupDescriptors(&m_tfx_textures_handle_gpu, m_tfx_textures.data(), 2))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX texture descriptor groups");
|
|
return ApplyTFXState(true);
|
|
}
|
|
|
|
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE;
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_TFX_RT_TEXTURES)
|
|
{
|
|
if (!GetTextureGroupDescriptors(&m_tfx_rt_textures_handle_gpu, m_tfx_textures.data() + 2, 2))
|
|
{
|
|
ExecuteCommandListAndRestartRenderPass(false, "Ran out of TFX RT descriptor descriptor groups");
|
|
return ApplyTFXState(true);
|
|
}
|
|
|
|
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
|
|
}
|
|
|
|
ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get();
|
|
|
|
if (m_current_root_signature != RootSignature::TFX)
|
|
{
|
|
m_current_root_signature = RootSignature::TFX;
|
|
flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING | DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING |
|
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE |
|
|
DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2 | DIRTY_FLAG_PIPELINE;
|
|
cmdlist->SetGraphicsRootSignature(m_tfx_root_signature.get());
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_VS_CONSTANT_BUFFER_BINDING)
|
|
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_VS_CBV, m_tfx_constant_buffers[0]);
|
|
if (flags & DIRTY_FLAG_PS_CONSTANT_BUFFER_BINDING)
|
|
cmdlist->SetGraphicsRootConstantBufferView(TFX_ROOT_SIGNATURE_PARAM_PS_CBV, m_tfx_constant_buffers[1]);
|
|
if (flags & DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING)
|
|
{
|
|
cmdlist->SetGraphicsRootShaderResourceView(TFX_ROOT_SIGNATURE_PARAM_VS_SRV,
|
|
m_vertex_stream_buffer.GetGPUPointer() + m_vertex.start * sizeof(GSVertex));
|
|
}
|
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
|
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_tfx_textures_handle_gpu);
|
|
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
|
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_tfx_samplers_handle_gpu);
|
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2)
|
|
cmdlist->SetGraphicsRootDescriptorTable(TFX_ROOT_SIGNATURE_PARAM_PS_RT_TEXTURES, m_tfx_rt_textures_handle_gpu);
|
|
|
|
ApplyBaseState(flags, cmdlist);
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice12::ApplyUtilityState(bool already_execed)
|
|
{
|
|
if (m_current_root_signature == RootSignature::Utility && m_dirty_flags == 0)
|
|
return true;
|
|
|
|
u32 flags = m_dirty_flags;
|
|
m_dirty_flags &= ~DIRTY_UTILITY_STATE;
|
|
|
|
ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get();
|
|
|
|
if (m_current_root_signature != RootSignature::Utility)
|
|
{
|
|
m_current_root_signature = RootSignature::Utility;
|
|
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_PIPELINE;
|
|
cmdlist->SetGraphicsRootSignature(m_utility_root_signature.get());
|
|
}
|
|
|
|
if (flags & DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE)
|
|
cmdlist->SetGraphicsRootDescriptorTable(UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES, m_utility_texture_gpu);
|
|
if (flags & DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE)
|
|
cmdlist->SetGraphicsRootDescriptorTable(UTILITY_ROOT_SIGNATURE_PARAM_PS_SAMPLERS, m_utility_sampler_gpu);
|
|
|
|
ApplyBaseState(flags, cmdlist);
|
|
return true;
|
|
}
|
|
|
|
void GSDevice12::SetVSConstantBuffer(const GSHWDrawConfig::VSConstantBuffer& cb)
|
|
{
|
|
if (m_vs_cb_cache.Update(cb))
|
|
m_dirty_flags |= DIRTY_FLAG_VS_CONSTANT_BUFFER;
|
|
}
|
|
|
|
void GSDevice12::SetPSConstantBuffer(const GSHWDrawConfig::PSConstantBuffer& cb)
|
|
{
|
|
if (m_ps_cb_cache.Update(cb))
|
|
m_dirty_flags |= DIRTY_FLAG_PS_CONSTANT_BUFFER;
|
|
}
|
|
|
|
void GSDevice12::SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox)
|
|
{
|
|
GL_PUSH("SetupDATE {%d,%d} %dx%d", bbox.left, bbox.top, bbox.width(), bbox.height());
|
|
|
|
const GSVector2i size(ds->GetSize());
|
|
const GSVector4 src = GSVector4(bbox) / GSVector4(size).xyxy();
|
|
const GSVector4 dst = src * 2.0f - 1.0f;
|
|
const GSVertexPT1 vertices[] = {
|
|
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
|
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
|
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
|
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
|
};
|
|
|
|
// sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows
|
|
EndRenderPass();
|
|
SetUtilityTexture(rt, m_point_sampler_cpu);
|
|
OMSetRenderTargets(nullptr, ds, bbox);
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), 4);
|
|
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
SetPipeline(m_convert[SetDATMShader(datm)].get());
|
|
// Reference stencil value set on Create()
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
GSVector4::zero(), 0.0f, 0);
|
|
if (ApplyUtilityState())
|
|
DrawPrimitive();
|
|
|
|
EndRenderPass();
|
|
}
|
|
|
|
GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, PipelineSelector& pipe)
|
|
{
|
|
// How this is done:
|
|
// - can't put a barrier for the image in the middle of the normal render pass, so that's out
|
|
// - so, instead of just filling the int texture with INT_MAX, we sample the RT and use -1 for failing values
|
|
// - then, instead of sampling the RT with DATE=1/2, we just do a min() without it, the -1 gets preserved
|
|
// - then, the DATE=3 draw is done as normal
|
|
GL_INS("Setup DATE Primitive ID Image for {%d,%d}-{%d,%d}", config.drawarea.left, config.drawarea.top,
|
|
config.drawarea.right, config.drawarea.bottom);
|
|
|
|
const GSVector2i rtsize(config.rt->GetSize());
|
|
GSTexture12* image =
|
|
static_cast<GSTexture12*>(CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false));
|
|
if (!image)
|
|
return nullptr;
|
|
|
|
EndRenderPass();
|
|
|
|
// setup the fill quad to prefill with existing alpha values
|
|
SetUtilityTexture(config.rt, m_point_sampler_cpu);
|
|
OMSetRenderTargets(image, config.ds, config.drawarea);
|
|
|
|
// if the depth target has been cleared, we need to preserve that clear
|
|
BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
config.ds ? GetLoadOpForTexture(static_cast<GSTexture12*>(config.ds)) :
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
config.ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
GSVector4::zero(), config.ds ? config.ds->GetClearDepth() : 0.0f);
|
|
|
|
// draw the quad to prefill the image
|
|
const GSVector4 src = GSVector4(config.drawarea) / GSVector4(rtsize).xyxy();
|
|
const GSVector4 dst = src * 2.0f - 1.0f;
|
|
const GSVertexPT1 vertices[] = {
|
|
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
|
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
|
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
|
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
|
};
|
|
SetUtilityRootSignature();
|
|
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
SetPipeline(m_date_image_setup_pipelines[pipe.ds][static_cast<u8>(config.datm)].get());
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
|
|
if (ApplyUtilityState())
|
|
DrawPrimitive();
|
|
|
|
// image is now filled with either -1 or INT_MAX, so now we can do the prepass
|
|
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
|
|
UploadHWDrawVerticesAndIndices(config);
|
|
|
|
// cut down the configuration for the prepass, we don't need blending or any feedback loop
|
|
PipelineSelector init_pipe(m_pipeline_selector);
|
|
init_pipe.dss.zwe = false;
|
|
init_pipe.cms.wrgba = 0;
|
|
init_pipe.bs = {};
|
|
init_pipe.rt = true;
|
|
init_pipe.ps.blend_a = init_pipe.ps.blend_b = init_pipe.ps.blend_c = init_pipe.ps.blend_d = false;
|
|
init_pipe.ps.no_color = false;
|
|
init_pipe.ps.no_color1 = true;
|
|
if (BindDrawPipeline(init_pipe))
|
|
DrawIndexedPrimitive();
|
|
|
|
// image is initialized/prepass is done, so finish up and get ready to do the "real" draw
|
|
EndRenderPass();
|
|
|
|
// .. by setting it to DATE=3
|
|
pipe.ps.date = 3;
|
|
config.alpha_second_pass.ps.date = 3;
|
|
|
|
// and bind the image to the primitive sampler
|
|
image->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
PSSetShaderResource(3, image, false);
|
|
return image;
|
|
}
|
|
|
|
void GSDevice12::FeedbackBarrier(const GSTexture12* texture)
|
|
{
|
|
if (m_enhanced_barriers)
|
|
{
|
|
// Enhanced barriers allows for single resource feedback.
|
|
const D3D12_BARRIER_SYNC sync = D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING;
|
|
const D3D12_BARRIER_ACCESS access = D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
|
|
const D3D12_TEXTURE_BARRIER barrier = {sync, sync, access, access, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_LAYOUT_COMMON,
|
|
texture->GetResource(), {D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, 0, 0, 0, 0, 0}, D3D12_TEXTURE_BARRIER_FLAG_NONE};
|
|
const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_TEXTURE, .NumBarriers = 1, .pTextureBarriers = &barrier};
|
|
GetCommandList().list7->Barrier(1, &group);
|
|
}
|
|
else
|
|
{
|
|
// The DX12 spec notes "You may not read from, or consume, a write that occurred within the same render pass".
|
|
// The only exception being the implicit reads for render target blending or depth testing.
|
|
// Thus, in addition to a barrier, we need to end the render pass.
|
|
EndRenderPass();
|
|
// Specify null for the after resource as both resources are used after the barrier.
|
|
// While this may also be true before the barrier, we only write using the main resource.
|
|
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
|
barrier.Aliasing = {texture->GetResource(), nullptr};
|
|
GetCommandList().list4->ResourceBarrier(1, &barrier);
|
|
}
|
|
}
|
|
|
|
void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|
{
|
|
|
|
GSTexture12* colclip_rt = static_cast<GSTexture12*>(g_gs_device->GetColorClipTexture());
|
|
GSTexture12* draw_rt = static_cast<GSTexture12*>(config.rt);
|
|
GSTexture12* draw_ds = static_cast<GSTexture12*>(config.ds);
|
|
GSTexture12* draw_rt_clone = nullptr;
|
|
|
|
// Align the render area to 128x128, hopefully avoiding render pass restarts for small render area changes (e.g. Ratchet and Clank).
|
|
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
|
|
|
PipelineSelector& pipe = m_pipeline_selector;
|
|
|
|
// figure out the pipeline
|
|
UpdateHWPipelineSelector(config);
|
|
|
|
// Handle RT hazard when no barrier was requested
|
|
if (m_features.texture_barrier && config.tex && (config.tex == config.rt) && !(config.require_one_barrier || config.require_full_barrier))
|
|
{
|
|
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
|
FeedbackBarrier(draw_rt);
|
|
}
|
|
|
|
// now blit the colclip texture back to the original target
|
|
if (colclip_rt)
|
|
{
|
|
if (config.colclip_mode == GSHWDrawConfig::ColClipMode::EarlyResolve)
|
|
{
|
|
GL_PUSH("Blit ColorClip back to RT");
|
|
|
|
EndRenderPass();
|
|
colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
|
|
draw_rt = static_cast<GSTexture12*>(config.rt);
|
|
OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area);
|
|
|
|
// if this target was cleared and never drawn to, perform the clear as part of the resolve here.
|
|
BeginRenderPass(GetLoadOpForTexture(draw_rt), D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
GetLoadOpForTexture(draw_ds),
|
|
draw_ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
draw_rt->GetUNormClearColor(), 0.0f, 0);
|
|
|
|
const GSVector4 sRect(GSVector4(config.colclip_update_area) / GSVector4(rtsize.x, rtsize.y).xyxy());
|
|
SetPipeline(m_colclip_finish_pipelines[pipe.ds].get());
|
|
SetUtilityTexture(colclip_rt, m_point_sampler_cpu);
|
|
DrawStretchRect(sRect, GSVector4(config.colclip_update_area), rtsize);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
|
|
Recycle(colclip_rt);
|
|
g_gs_device->SetColorClipTexture(nullptr);
|
|
}
|
|
else
|
|
{
|
|
draw_rt = colclip_rt;
|
|
pipe.ps.colclip_hw = 1;
|
|
}
|
|
}
|
|
|
|
// Destination Alpha Setup
|
|
const bool need_barrier = m_features.texture_barrier && (config.require_one_barrier || config.require_full_barrier);
|
|
switch (config.destination_alpha)
|
|
{
|
|
case GSHWDrawConfig::DestinationAlphaMode::Off: // No setup
|
|
case GSHWDrawConfig::DestinationAlphaMode::Full: // No setup
|
|
case GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking: // Setup is done below
|
|
break;
|
|
case GSHWDrawConfig::DestinationAlphaMode::StencilOne: // setup is done below
|
|
{
|
|
// we only need to do the setup here if we don't have barriers, in which case do full DATE.
|
|
if (!need_barrier)
|
|
{
|
|
SetupDATE(draw_rt, config.ds, config.datm, config.drawarea);
|
|
config.destination_alpha = GSHWDrawConfig::DestinationAlphaMode::Stencil;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GSHWDrawConfig::DestinationAlphaMode::Stencil:
|
|
SetupDATE(draw_rt, config.ds, config.datm, config.drawarea);
|
|
break;
|
|
}
|
|
|
|
// stream buffer in first, in case we need to exec
|
|
SetVSConstantBuffer(config.cb_vs);
|
|
SetPSConstantBuffer(config.cb_ps);
|
|
|
|
// bind textures before checking the render pass, in case we need to transition them
|
|
if (config.tex)
|
|
{
|
|
PSSetShaderResource(0, config.tex, config.tex != config.rt && config.tex != config.ds);
|
|
PSSetSampler(config.sampler);
|
|
}
|
|
if (config.pal)
|
|
PSSetShaderResource(1, config.pal, true);
|
|
|
|
if (config.blend.constant_enable)
|
|
SetBlendConstants(config.blend.constant);
|
|
|
|
// Primitive ID tracking DATE setup.
|
|
GSTexture12* date_image = nullptr;
|
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
|
{
|
|
GSTexture* backup_rt = config.rt;
|
|
config.rt = draw_rt;
|
|
date_image = SetupPrimitiveTrackingDATE(config, pipe);
|
|
config.rt = backup_rt;
|
|
if (!date_image)
|
|
{
|
|
Console.Warning("D3D12: Failed to allocate DATE image, aborting draw.");
|
|
return;
|
|
}
|
|
}
|
|
else if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Depth)
|
|
{
|
|
PSSetShaderResource(2, draw_rt, true);
|
|
if ((draw_rt == m_current_render_target))
|
|
Console.Warning("DATE RT SAME");
|
|
else
|
|
Console.Warning("DATE RT NOT SAME");
|
|
|
|
draw_rt = (draw_rt == m_current_render_target) ? nullptr : m_current_render_target;
|
|
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, config.tex && config.tex == config.ds);
|
|
}
|
|
|
|
// Switch to colclip target for colclip hw rendering
|
|
if (pipe.ps.colclip_hw)
|
|
{
|
|
if (!colclip_rt)
|
|
{
|
|
config.colclip_update_area = config.drawarea;
|
|
|
|
EndRenderPass();
|
|
|
|
colclip_rt = static_cast<GSTexture12*>(CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::ColorClip, false));
|
|
if (!colclip_rt)
|
|
{
|
|
Console.Warning("D3D12: Failed to allocate ColorClip render target, aborting draw.");
|
|
|
|
if (date_image)
|
|
Recycle(date_image);
|
|
|
|
return;
|
|
}
|
|
|
|
g_gs_device->SetColorClipTexture(static_cast<GSTexture*>(colclip_rt));
|
|
|
|
// propagate clear value through if the colclip render is the first
|
|
if (draw_rt->GetState() == GSTexture::State::Cleared)
|
|
{
|
|
colclip_rt->SetState(GSTexture::State::Cleared);
|
|
colclip_rt->SetClearColor(draw_rt->GetClearColor());
|
|
}
|
|
else if (draw_rt->GetState() == GSTexture::State::Dirty)
|
|
{
|
|
GL_PUSH_("ColorClip Render Target Setup");
|
|
draw_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
}
|
|
|
|
// we're not drawing to the RT, so we can use it as a source
|
|
if (config.require_one_barrier && !m_features.texture_barrier)
|
|
PSSetShaderResource(2, draw_rt, true);
|
|
}
|
|
|
|
draw_rt = colclip_rt;
|
|
}
|
|
|
|
// Clear texture binding when it's bound to RT or DS.
|
|
if (!config.tex && ((draw_rt && static_cast<GSTexture12*>(draw_rt)->GetSRVDescriptor() == m_tfx_textures[0]) ||
|
|
(draw_ds && static_cast<GSTexture12*>(draw_ds)->GetSRVDescriptor() == m_tfx_textures[0])))
|
|
PSSetShaderResource(0, nullptr, false);
|
|
|
|
if (m_in_render_pass && (m_current_render_target == draw_rt || m_current_depth_target == draw_ds))
|
|
{
|
|
// avoid restarting the render pass just to switch from rt+depth to rt and vice versa
|
|
// keep the depth even if doing colclip hw draws, because the next draw will probably re-enable depth
|
|
if (!draw_rt && m_current_render_target && config.tex != m_current_render_target &&
|
|
m_current_render_target->GetSize() == draw_ds->GetSize())
|
|
{
|
|
draw_rt = m_current_render_target;
|
|
m_pipeline_selector.rt = true;
|
|
}
|
|
}
|
|
else if (!draw_ds && m_current_depth_target && config.tex != m_current_depth_target &&
|
|
m_current_depth_target->GetSize() == draw_rt->GetSize())
|
|
{
|
|
draw_ds = m_current_depth_target;
|
|
m_pipeline_selector.ds = true;
|
|
}
|
|
|
|
const bool feedback = draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.texture_barrier) || (config.tex && config.tex == config.rt));
|
|
if (feedback && !m_features.texture_barrier)
|
|
{
|
|
// Requires a copy of the RT.
|
|
draw_rt_clone = static_cast<GSTexture12*>(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true));
|
|
if (draw_rt_clone)
|
|
{
|
|
GL_PUSH("D3D12: Copy RT to temp texture {%d,%d %dx%d}",
|
|
config.drawarea.left, config.drawarea.top,
|
|
config.drawarea.width(), config.drawarea.height());
|
|
EndRenderPass();
|
|
CopyRect(draw_rt, draw_rt_clone, config.drawarea, config.drawarea.left, config.drawarea.top);
|
|
if (config.require_one_barrier)
|
|
PSSetShaderResource(2, draw_rt_clone, true);
|
|
if (config.tex && config.tex == config.rt)
|
|
PSSetShaderResource(0, draw_rt_clone, true);
|
|
}
|
|
else
|
|
Console.Warning("D3D12: Failed to allocate temp texture for RT copy.");
|
|
}
|
|
|
|
// For depth testing and sampling, use a read only dsv, otherwise use a write dsv
|
|
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, config.tex && config.tex == config.ds);
|
|
|
|
// DX12 equivalent of vkCmdClearAttachments for StencilOne
|
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne)
|
|
{
|
|
EndRenderPass();
|
|
// Make sure the DSV is in writeable state
|
|
draw_ds->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil);
|
|
|
|
D3D12_RECT rect = {config.drawarea.left, config.drawarea.top, config.drawarea.left + config.drawarea.width(), config.drawarea.top + config.drawarea.height()};
|
|
GetCommandList().list4->ClearDepthStencilView(draw_ds->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, 0.0f, 1, 1, &rect);
|
|
}
|
|
|
|
// Begin render pass if new target or out of the area.
|
|
if (!m_in_render_pass)
|
|
{
|
|
GSVector4 clear_color = draw_rt ? draw_rt->GetUNormClearColor() : GSVector4::zero();
|
|
if (pipe.ps.colclip_hw)
|
|
{
|
|
// Denormalize clear color for hw colclip.
|
|
clear_color *= GSVector4::cxpr(255.0f / 65535.0f, 255.0f / 65535.0f, 255.0f / 65535.0f, 1.0f);
|
|
}
|
|
|
|
const bool stencil_DATE = config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Stencil ||
|
|
config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne;
|
|
|
|
BeginRenderPass(GetLoadOpForTexture(draw_rt),
|
|
draw_rt ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
GetLoadOpForTexture(draw_ds),
|
|
draw_ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
stencil_DATE ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS,
|
|
stencil_DATE ? (need_barrier ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD) :
|
|
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
clear_color, draw_ds ? draw_ds->GetClearDepth() : 0.0f, 1);
|
|
}
|
|
|
|
// rt -> colclip hw blit if enabled
|
|
if (colclip_rt && (config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertOnly || config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertAndResolve) && config.rt->GetState() == GSTexture::State::Dirty)
|
|
{
|
|
OMSetRenderTargets(draw_rt, draw_ds, GSVector4i::loadh(rtsize));
|
|
SetUtilityTexture(static_cast<GSTexture12*>(config.rt), m_point_sampler_cpu);
|
|
SetPipeline(m_colclip_setup_pipelines[pipe.ds].get());
|
|
|
|
const GSVector4 drawareaf = GSVector4((config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertOnly) ? GSVector4i::loadh(rtsize) : config.drawarea);
|
|
const GSVector4 sRect(drawareaf / GSVector4(rtsize.x, rtsize.y).xyxy());
|
|
DrawStretchRect(sRect, GSVector4(drawareaf), rtsize);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
|
|
GL_POP();
|
|
|
|
// Restore original scissor, not sure if needed since the render pass has already been started. But to be safe.
|
|
OMSetRenderTargets(draw_rt, draw_ds, config.scissor);
|
|
}
|
|
// VB/IB upload, if we did DATE setup and it's not colclip hw this has already been done
|
|
SetPrimitiveTopology(s_primitive_topology_mapping[static_cast<u8>(config.topology)]);
|
|
if (!date_image || colclip_rt)
|
|
UploadHWDrawVerticesAndIndices(config);
|
|
|
|
// now we can do the actual draw
|
|
SendHWDraw(pipe, config, draw_rt, feedback, config.require_one_barrier, config.require_full_barrier);
|
|
|
|
// blend second pass
|
|
if (config.blend_multi_pass.enable)
|
|
{
|
|
if (config.blend_multi_pass.blend.constant_enable)
|
|
SetBlendConstants(config.blend_multi_pass.blend.constant);
|
|
|
|
pipe.bs = config.blend_multi_pass.blend;
|
|
pipe.ps.no_color1 = config.blend_multi_pass.no_color1;
|
|
pipe.ps.blend_hw = config.blend_multi_pass.blend_hw;
|
|
pipe.ps.dither = config.blend_multi_pass.dither;
|
|
if (BindDrawPipeline(pipe))
|
|
DrawIndexedPrimitive();
|
|
}
|
|
|
|
// and the alpha pass
|
|
if (config.alpha_second_pass.enable)
|
|
{
|
|
// cbuffer will definitely be dirty if aref changes, no need to check it
|
|
if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref)
|
|
{
|
|
config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref;
|
|
SetPSConstantBuffer(config.cb_ps);
|
|
}
|
|
|
|
pipe.ps = config.alpha_second_pass.ps;
|
|
pipe.cms = config.alpha_second_pass.colormask;
|
|
pipe.dss = config.alpha_second_pass.depth;
|
|
pipe.bs = config.blend;
|
|
SendHWDraw(pipe, config, draw_rt, feedback, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier);
|
|
}
|
|
|
|
if (date_image)
|
|
Recycle(date_image);
|
|
|
|
if (draw_rt_clone)
|
|
Recycle(draw_rt_clone);
|
|
|
|
// now blit the colclip texture back to the original target
|
|
if (colclip_rt)
|
|
{
|
|
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
|
|
|
|
if ((config.colclip_mode == GSHWDrawConfig::ColClipMode::ResolveOnly || config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertAndResolve))
|
|
{
|
|
GL_PUSH("Blit ColorClip back to RT");
|
|
|
|
EndRenderPass();
|
|
colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
|
|
|
|
draw_rt = static_cast<GSTexture12*>(config.rt);
|
|
OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area);
|
|
|
|
// if this target was cleared and never drawn to, perform the clear as part of the resolve here.
|
|
BeginRenderPass(GetLoadOpForTexture(draw_rt), D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE,
|
|
GetLoadOpForTexture(draw_ds),
|
|
draw_ds ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS,
|
|
draw_rt->GetUNormClearColor(), 0.0f, 0);
|
|
|
|
const GSVector4 sRect(GSVector4(config.colclip_update_area) / GSVector4(rtsize.x, rtsize.y).xyxy());
|
|
SetPipeline(m_colclip_finish_pipelines[pipe.ds].get());
|
|
SetUtilityTexture(colclip_rt, m_point_sampler_cpu);
|
|
DrawStretchRect(sRect, GSVector4(config.colclip_update_area), rtsize);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
|
|
Recycle(colclip_rt);
|
|
g_gs_device->SetColorClipTexture(nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig& config, GSTexture12* draw_rt, const bool feedback, const bool one_barrier, const bool full_barrier)
|
|
{
|
|
if (BindDrawPipeline(pipe) && !m_features.texture_barrier) [[unlikely]]
|
|
{
|
|
DrawIndexedPrimitive();
|
|
return;
|
|
}
|
|
|
|
if (feedback)
|
|
{
|
|
#ifdef PCSX2_DEVBUILD
|
|
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
|
Console.Warning("D3D12: Possible unnecessary barrier detected.");
|
|
#endif
|
|
if (one_barrier || full_barrier)
|
|
PSSetShaderResource(2, draw_rt, false, true);
|
|
if (config.tex && config.tex == config.rt)
|
|
PSSetShaderResource(0, draw_rt, false, true);
|
|
|
|
if (full_barrier)
|
|
{
|
|
pxAssert(config.drawlist && !config.drawlist->empty());
|
|
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
|
const u32 indices_per_prim = config.indices_per_prim;
|
|
|
|
GL_PUSH("Split the draw");
|
|
g_perfmon.Put(GSPerfMon::Barriers, draw_list_size);
|
|
|
|
for (u32 n = 0, p = 0; n < draw_list_size; n++)
|
|
{
|
|
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
|
|
|
FeedbackBarrier(draw_rt);
|
|
|
|
if (BindDrawPipeline(pipe))
|
|
DrawIndexedPrimitive(p, count);
|
|
p += count;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (one_barrier)
|
|
{
|
|
g_perfmon.Put(GSPerfMon::Barriers, 1);
|
|
|
|
FeedbackBarrier(draw_rt);
|
|
}
|
|
}
|
|
|
|
if (BindDrawPipeline(pipe))
|
|
DrawIndexedPrimitive();
|
|
}
|
|
|
|
void GSDevice12::UpdateHWPipelineSelector(GSHWDrawConfig& config)
|
|
{
|
|
m_pipeline_selector.vs.key = config.vs.key;
|
|
m_pipeline_selector.ps.key_hi = config.ps.key_hi;
|
|
m_pipeline_selector.ps.key_lo = config.ps.key_lo;
|
|
m_pipeline_selector.dss.key = config.depth.key;
|
|
m_pipeline_selector.bs.key = config.blend.key;
|
|
m_pipeline_selector.bs.constant = 0; // don't dupe states with different alpha values
|
|
m_pipeline_selector.cms.key = config.colormask.key;
|
|
m_pipeline_selector.topology = static_cast<u32>(config.topology);
|
|
m_pipeline_selector.rt = config.rt != nullptr;
|
|
m_pipeline_selector.ds = config.ds != nullptr;
|
|
}
|
|
|
|
void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
|
{
|
|
IASetVertexBuffer(config.verts, sizeof(GSVertex), config.nverts);
|
|
|
|
// Update SRV in root signature directly, rather than using a uniform for base vertex.
|
|
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
|
|
m_dirty_flags |= DIRTY_FLAG_VS_VERTEX_BUFFER_BINDING;
|
|
|
|
if (config.vs.UseExpandIndexBuffer())
|
|
{
|
|
m_index.start = 0;
|
|
m_index.count = config.nindices;
|
|
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R16_UINT);
|
|
}
|
|
else
|
|
{
|
|
IASetIndexBuffer(config.indices, config.nindices);
|
|
}
|
|
}
|