Files
archived-pcsx2/pcsx2/GS/Renderers/DX11/GSDevice11.cpp
lightningterror 4aa49a917c 2
2026-01-27 00:17:44 +01:00

2881 lines
87 KiB
C++

// SPDX-FileCopyrightText: 2002-2026 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#include "GS.h"
#include "GS/GSGL.h"
#include "GSDevice11.h"
#include "GS/Renderers/DX11/D3D.h"
#include "GS/GSExtra.h"
#include "GS/GSPerfMon.h"
#include "GS/GSUtil.h"
#include "Host.h"
#include "common/BitUtils.h"
#include "common/Error.h"
#include "common/Path.h"
#include "common/StringUtil.h"
#include "imgui.h"
#include "IconsFontAwesome.h"
#include <bit>
#include <fstream>
#include <sstream>
#include <VersionHelpers.h>
#include <d3dcompiler.h>
#include <dxgidebug.h>
// #define REPORT_LEAKED_OBJECTS 1
static constexpr std::array<float, 4> s_present_clear_color = {};
static bool SupportsTextureFormat(ID3D11Device* dev, DXGI_FORMAT format)
{
UINT support;
if (FAILED(dev->CheckFormatSupport(format, &support)))
return false;
return (support & D3D11_FORMAT_SUPPORT_TEXTURE2D) != 0;
}
GSDevice11::GSDevice11()
{
memset(&m_state, 0, sizeof(m_state));
m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
m_state.bf = -1;
m_features.primitive_id = true;
m_features.texture_barrier = false;
m_features.multidraw_fb_copy = GSConfig.OverrideTextureBarriers != 0;
m_features.provoking_vertex_last = false;
m_features.point_expand = false;
m_features.line_expand = false;
m_features.prefer_new_textures = false;
m_features.dxt_textures = false;
m_features.bptc_textures = false;
m_features.framebuffer_fetch = false;
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;
m_features.test_and_sample_depth = true;
}
GSDevice11::~GSDevice11() = default;
void GSDevice11::SetD3DDebugObjectName(ID3D11DeviceChild* obj, std::string_view name)
{
#ifdef PCSX2_DEVBUILD
// WKPDID_D3DDebugObjectName
static constexpr GUID guid = {0x429b8c22, 0x9188, 0x4b0c, {0x87, 0x42, 0xac, 0xb0, 0xbf, 0x85, 0xc2, 0x00}};
UINT existing_data_size;
HRESULT hr = obj->GetPrivateData(guid, &existing_data_size, nullptr);
if (SUCCEEDED(hr) && existing_data_size > 0)
return;
obj->SetPrivateData(guid, static_cast<UINT>(name.length()), name.data());
#endif
}
RenderAPI GSDevice11::GetRenderAPI() const
{
return RenderAPI::D3D11;
}
bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
{
if (!GSDevice::Create(vsync_mode, allow_present_throttle))
return false;
UINT create_flags = 0;
if (GSConfig.UseDebugDevice)
create_flags |= D3D11_CREATE_DEVICE_DEBUG;
m_dxgi_factory = D3D::CreateFactory(GSConfig.UseDebugDevice);
if (!m_dxgi_factory)
return false;
wil::com_ptr_nothrow<IDXGIAdapter1> dxgi_adapter = D3D::GetAdapterByName(m_dxgi_factory.get(), GSConfig.Adapter);
static constexpr std::array<D3D_FEATURE_LEVEL, 2> requested_feature_levels = {{
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_0,
}};
wil::com_ptr_nothrow<ID3D11Device> temp_dev;
wil::com_ptr_nothrow<ID3D11DeviceContext> temp_ctx;
HRESULT hr =
D3D11CreateDevice(dxgi_adapter.get(), dxgi_adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE,
nullptr, create_flags, requested_feature_levels.data(), static_cast<UINT>(requested_feature_levels.size()),
D3D11_SDK_VERSION, temp_dev.put(), &m_feature_level, temp_ctx.put());
if (FAILED(hr) || !temp_dev.try_query_to(&m_dev) || !temp_ctx.try_query_to(&m_ctx))
{
Host::ReportErrorAsync("GS",
fmt::format(
TRANSLATE_FS("GS", "Failed to create D3D11 device: 0x{:08X}. A GPU which supports Direct3D Feature Level 10.0 is required."),
hr));
return false;
}
// we re-grab these later, see below
dxgi_adapter.reset();
temp_dev.reset();
temp_ctx.reset();
if (GSConfig.UseDebugDevice && IsDebuggerPresent())
{
wil::com_ptr_nothrow<ID3D11InfoQueue> info;
if (m_dev.try_query_to(&info))
{
info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_ERROR, TRUE);
info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_WARNING, TRUE);
// Silence some annoying harmless warnings.
D3D11_MESSAGE_ID hide[] = {
D3D11_MESSAGE_ID_DEVICE_OMSETRENDERTARGETS_HAZARD,
D3D11_MESSAGE_ID_DEVICE_PSSETSHADERRESOURCES_HAZARD,
D3D11_MESSAGE_ID_DEVICE_DRAW_RENDERTARGETVIEW_NOT_SET,
D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS,
};
D3D11_INFO_QUEUE_FILTER filter = {};
filter.DenyList.NumIDs = std::size(hide);
filter.DenyList.pIDList = hide;
info->AddStorageFilterEntries(&filter);
}
}
wil::com_ptr_nothrow<IDXGIDevice> dxgi_device;
if (m_dev.try_query_to(&dxgi_device) && SUCCEEDED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.put()))))
{
m_name = D3D::GetAdapterName(dxgi_adapter.get());
Console.WriteLn(fmt::format("D3D11: Adapter: {}", m_name));
}
else
Console.Error("D3D11: Failed to obtain adapter name.");
BOOL allow_tearing_supported = false;
hr = m_dxgi_factory->CheckFeatureSupport(
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
if (!AcquireWindow(true) || (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()))
return false;
D3D11_BUFFER_DESC bd;
D3D11_SAMPLER_DESC sd;
D3D11_DEPTH_STENCIL_DESC dsd;
D3D11_RASTERIZER_DESC rd;
D3D11_BLEND_DESC bsd;
if (GSConfig.UseDebugDevice)
m_annotation = m_ctx.try_query<ID3DUserDefinedAnnotation>();
if (!m_shader_cache.Open(m_feature_level, GSConfig.UseDebugDevice))
Console.Warning("D3D11: Shader cache failed to open.");
{
// HACK: check AMD
// Broken point sampler should be enabled only on AMD.
wil::com_ptr_nothrow<IDXGIDevice> dxgi_device;
wil::com_ptr_nothrow<IDXGIAdapter1> dxgi_adapter;
if (SUCCEEDED(m_dev->QueryInterface(dxgi_device.put())) &&
SUCCEEDED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.put()))))
{
m_features.broken_point_sampler = (D3D::GetVendorID(dxgi_adapter.get()) == D3D::VendorID::AMD);
}
}
SetFeatures(dxgi_adapter.get());
std::optional<std::string> shader = ReadShaderSource("shaders/dx11/tfx.fx");
if (!shader.has_value())
return false;
m_tfx_source = std::move(*shader);
// convert
D3D11_INPUT_ELEMENT_DESC il_convert[] =
{
{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
};
const std::optional<std::string> convert_hlsl = ReadShaderSource("shaders/dx11/convert.fx");
if (!convert_hlsl.has_value())
return false;
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_convert.vs.put(), m_convert.il.put(),
il_convert, std::size(il_convert), *convert_hlsl, nullptr, "vs_main"))
{
return false;
}
for (size_t i = 0; i < std::size(m_convert.ps); i++)
{
m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, nullptr, shaderName(static_cast<ShaderConvert>(i)));
if (!m_convert.ps[i])
return false;
}
shader = ReadShaderSource("shaders/dx11/present.fx");
if (!shader.has_value())
return false;
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_present.vs.put(), m_present.il.put(),
il_convert, std::size(il_convert), *shader, nullptr, "vs_main"))
{
return false;
}
for (size_t i = 0; i < std::size(m_present.ps); i++)
{
m_present.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, shaderName(static_cast<PresentShader>(i)));
if (!m_present.ps[i])
return false;
}
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(DisplayConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
m_dev->CreateBuffer(&bd, nullptr, m_present.ps_cb.put());
memset(&dsd, 0, sizeof(dsd));
m_dev->CreateDepthStencilState(&dsd, m_convert.dss.put());
dsd.DepthEnable = true;
dsd.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
dsd.DepthFunc = D3D11_COMPARISON_ALWAYS;
m_dev->CreateDepthStencilState(&dsd, m_convert.dss_write.put());
memset(&bsd, 0, sizeof(bsd));
for (u32 i = 0; i < static_cast<u32>(m_convert.bs.size()); i++)
{
bsd.RenderTarget[0].RenderTargetWriteMask = static_cast<u8>(i);
m_dev->CreateBlendState(&bsd, m_convert.bs[i].put());
}
// merge
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(MergeConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
m_dev->CreateBuffer(&bd, nullptr, m_merge.cb.put());
shader = ReadShaderSource("shaders/dx11/merge.fx");
if (!shader.has_value())
return false;
for (size_t i = 0; i < std::size(m_merge.ps); i++)
{
const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%zu", i));
m_merge.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str());
if (!m_merge.ps[i])
return false;
}
memset(&bsd, 0, sizeof(bsd));
bsd.RenderTarget[0].BlendEnable = true;
bsd.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
bsd.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
bsd.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
bsd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
bsd.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bsd.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bsd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
m_dev->CreateBlendState(&bsd, m_merge.bs.put());
// interlace
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(InterlaceConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
m_dev->CreateBuffer(&bd, nullptr, m_interlace.cb.put());
shader = ReadShaderSource("shaders/dx11/interlace.fx");
if (!shader.has_value())
return false;
for (size_t i = 0; i < std::size(m_interlace.ps); i++)
{
const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%zu", i));
m_interlace.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str());
if (!m_interlace.ps[i])
return false;
}
// Shade Boost
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(float) * 4;
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
m_dev->CreateBuffer(&bd, nullptr, m_shadeboost.cb.put());
shader = ReadShaderSource("shaders/dx11/shadeboost.fx");
if (!shader.has_value())
return false;
m_shadeboost.ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, "ps_main");
if (!m_shadeboost.ps)
return false;
// Vertex/Index Buffer
bd = {};
bd.ByteWidth = VERTEX_BUFFER_SIZE;
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_vb.put())))
{
Console.Error("D3D11: Failed to create vertex buffer.");
return false;
}
bd.ByteWidth = INDEX_BUFFER_SIZE;
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_ib.put())))
{
Console.Error("D3D11: Failed to create index buffer.");
return false;
}
IASetIndexBuffer(m_ib.get());
if (m_features.vs_expand)
{
bd.ByteWidth = VERTEX_BUFFER_SIZE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.StructureByteStride = sizeof(GSVertex);
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_expand_vb.put())))
{
Console.Error("D3D11: Failed to create expand vertex buffer.");
return false;
}
const CD3D11_SHADER_RESOURCE_VIEW_DESC vb_srv_desc(
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, VERTEX_BUFFER_SIZE / sizeof(GSVertex));
if (FAILED(m_dev->CreateShaderResourceView(m_expand_vb.get(), &vb_srv_desc, m_expand_vb_srv.put())))
{
Console.Error("D3D11: Failed to create expand vertex buffer SRV.");
return false;
}
m_ctx->VSSetShaderResources(0, 1, m_expand_vb_srv.addressof());
bd.ByteWidth = EXPAND_BUFFER_SIZE;
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.StructureByteStride = 0;
bd.MiscFlags = 0;
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
GenerateExpansionIndexBuffer(expand_data.get());
const D3D11_SUBRESOURCE_DATA srd = {expand_data.get()};
if (FAILED(m_dev->CreateBuffer(&bd, &srd, m_expand_ib.put())))
{
Console.Error("D3D11: Failed to create expand index buffer.");
return false;
}
}
// rasterizer
memset(&rd, 0, sizeof(rd));
rd.FillMode = D3D11_FILL_SOLID;
rd.CullMode = D3D11_CULL_NONE;
rd.FrontCounterClockwise = false;
rd.DepthBias = false;
rd.DepthBiasClamp = 0;
rd.SlopeScaledDepthBias = 0;
rd.DepthClipEnable = false; // ???
rd.ScissorEnable = true;
rd.MultisampleEnable = false;
rd.AntialiasedLineEnable = false;
m_dev->CreateRasterizerState(&rd, m_rs.put());
m_ctx->RSSetState(m_rs.get());
// sampler
memset(&sd, 0, sizeof(sd));
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.MinLOD = -FLT_MAX;
sd.MaxLOD = FLT_MAX;
sd.MaxAnisotropy = 1;
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
m_dev->CreateSamplerState(&sd, m_convert.ln.put());
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
m_dev->CreateSamplerState(&sd, m_convert.pt.put());
// constant buffer
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(GSHWDrawConfig::VSConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_vs_cb.put())))
{
Console.Error("D3D11: Failed to create vertex shader constant buffer.");
return false;
}
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(GSHWDrawConfig::PSConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_ps_cb.put())))
{
Console.Error("D3D11: Failed to create pixel shader constant buffer.");
return false;
}
// create layout
{
const VSSelector sel;
const GSHWDrawConfig::VSConstantBuffer cb;
SetupVS(sel, &cb);
}
// depth stencil
memset(&dsd, 0, sizeof(dsd));
dsd.DepthEnable = false;
dsd.StencilEnable = true;
dsd.StencilReadMask = 1;
dsd.StencilWriteMask = 1;
dsd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
dsd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE;
dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
dsd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
dsd.BackFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE;
dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
m_dev->CreateDepthStencilState(&dsd, m_date.dss.put());
// blend
{
D3D11_BLEND_DESC blend;
memset(&blend, 0, sizeof(blend));
m_dev->CreateBlendState(&blend, m_date.bs.put());
}
for (size_t i = 0; i < std::size(m_date.primid_init_ps); i++)
{
const std::string entry_point(StringUtil::StdStringFromFormat("ps_stencil_image_init_%zu", i));
m_date.primid_init_ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, nullptr, entry_point.c_str());
if (!m_date.primid_init_ps[i])
return false;
}
if (m_features.cas_sharpening && !CreateCASShaders())
return false;
if (!CreateImGuiResources())
return false;
if (m_feature_level < D3D_FEATURE_LEVEL_11_0)
{
Host::AddIconOSDMessage("d3d11_feature_level_warning", ICON_FA_TRIANGLE_EXCLAMATION,
TRANSLATE_SV("GS", "The Direct3D 11 renderer is running at feature level 10.0. This is an UNSUPPORTED configuration.\n"
"Do not request support, please upgrade your hardware/drivers first."),
Host::OSD_WARNING_DURATION);
}
return true;
}
void GSDevice11::Destroy()
{
GSDevice::Destroy();
DestroySwapChain();
DestroyTimestampQueries();
m_convert = {};
m_present = {};
m_merge = {};
m_interlace = {};
m_shadeboost = {};
m_date = {};
m_cas = {};
m_imgui = {};
m_vb.reset();
m_ib.reset();
m_expand_vb_srv.reset();
m_expand_vb.reset();
m_expand_ib.reset();
m_vs.clear();
m_vs_cb.reset();
m_gs.clear();
m_ps.clear();
m_ps_cb.reset();
m_ps_ss.clear();
m_om_dss.clear();
m_om_bs.clear();
m_rs.reset();
if (m_state.rt_view)
{
m_state.rt_view->Release();
m_state.rt_view = nullptr;
}
m_state.cached_rt_view = nullptr;
if (m_state.dsv)
{
m_state.dsv->Release();
m_state.dsv = nullptr;
}
m_state.cached_dsv = nullptr;
m_shader_cache.Close();
#ifdef REPORT_LEAKED_OBJECTS
wil::com_ptr_nothrow<ID3D11Debug> debug;
m_dev.try_query_to(&debug);
#endif
m_annotation.reset();
m_ctx.reset();
m_dev.reset();
m_dxgi_factory.reset();
#ifdef REPORT_LEAKED_OBJECTS
if (debug)
debug->ReportLiveDeviceObjects(D3D11_RLDO_DETAIL | D3D11_RLDO_IGNORE_INTERNAL);
#endif
}
void GSDevice11::SetFeatures(IDXGIAdapter1* adapter)
{
// Check all three formats, since the feature means any can be used.
m_features.dxt_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC1_UNORM) &&
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC2_UNORM) &&
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM);
m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM);
m_features.vs_expand = (!GSConfig.DisableVertexShaderExpand && m_feature_level >= D3D_FEATURE_LEVEL_11_0);
m_features.cas_sharpening = (m_feature_level >= D3D_FEATURE_LEVEL_11_0);
m_max_texture_size = (m_feature_level >= D3D_FEATURE_LEVEL_11_0) ?
D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION :
D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
}
bool GSDevice11::HasSurface() const
{
return static_cast<bool>(m_swap_chain);
}
void GSDevice11::SetVSyncMode(GSVSyncMode mode, bool allow_present_throttle)
{
m_allow_present_throttle = allow_present_throttle;
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
if (mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
{
WARNING_LOG("D3D11: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
mode = GSVSyncMode::FIFO;
}
if (m_vsync_mode == mode)
return;
const u32 old_buffer_count = GetSwapChainBufferCount();
m_vsync_mode = mode;
if (!m_swap_chain)
return;
if (GetSwapChainBufferCount() != old_buffer_count)
{
DestroySwapChain();
if (!CreateSwapChain())
pxFailRel("D3D11: Failed to recreate swap chain after vsync change.");
}
}
u32 GSDevice11::GetSwapChainBufferCount() const
{
// With vsync off, we only need two buffers. Same for blocking vsync.
// With triple buffering, we need three.
return (m_vsync_mode == GSVSyncMode::Mailbox) ? 3 : 2;
}
bool GSDevice11::CreateSwapChain()
{
constexpr DXGI_FORMAT swap_chain_format = DXGI_FORMAT_R8G8B8A8_UNORM;
if (m_window_info.type != WindowInfo::Type::Win32)
return false;
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
RECT client_rc{};
GetClientRect(window_hwnd, &client_rc);
DXGI_MODE_DESC fullscreen_mode;
wil::com_ptr_nothrow<IDXGIOutput> fullscreen_output;
if (Host::IsFullscreen())
{
u32 fullscreen_width, fullscreen_height;
float fullscreen_refresh_rate;
m_is_exclusive_fullscreen =
GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) &&
D3D::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.get(), window_hwnd, fullscreen_width,
fullscreen_height, fullscreen_refresh_rate, swap_chain_format, &fullscreen_mode,
fullscreen_output.put());
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
if (m_vsync_mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
{
WARNING_LOG("D3D11: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
m_vsync_mode = GSVSyncMode::FIFO;
}
}
else
{
m_is_exclusive_fullscreen = false;
}
m_using_flip_model_swap_chain = !GSConfig.UseBlitSwapChain || m_is_exclusive_fullscreen;
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
swap_chain_desc.Format = swap_chain_format;
swap_chain_desc.SampleDesc.Count = 1;
swap_chain_desc.BufferCount = GetSwapChainBufferCount();
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swap_chain_desc.SwapEffect =
m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD;
m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen);
if (m_using_allow_tearing)
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
HRESULT hr = S_OK;
if (m_is_exclusive_fullscreen)
{
DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
fs_sd_desc.Width = fullscreen_mode.Width;
fs_sd_desc.Height = fullscreen_mode.Height;
fs_desc.RefreshRate = fullscreen_mode.RefreshRate;
fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering;
fs_desc.Scaling = fullscreen_mode.Scaling;
fs_desc.Windowed = FALSE;
Console.WriteLn("D3D11: Creating a %dx%d exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
hr = m_dxgi_factory->CreateSwapChainForHwnd(
m_dev.get(), window_hwnd, &fs_sd_desc, &fs_desc, fullscreen_output.get(), m_swap_chain.put());
if (FAILED(hr))
{
Console.Warning("D3D11: Failed to create fullscreen swap chain, trying windowed.");
m_is_exclusive_fullscreen = false;
m_using_allow_tearing = m_allow_tearing_supported && m_using_flip_model_swap_chain;
}
}
if (!m_is_exclusive_fullscreen)
{
Console.WriteLn("D3D11: Creating a %dx%d %s windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height,
m_using_flip_model_swap_chain ? "flip-discard" : "discard");
hr = m_dxgi_factory->CreateSwapChainForHwnd(
m_dev.get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.put());
}
if (FAILED(hr) && m_using_flip_model_swap_chain)
{
Console.Warning("D3D11: Failed to create a flip-discard swap chain, trying discard.");
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
swap_chain_desc.Flags = 0;
m_using_flip_model_swap_chain = false;
m_using_allow_tearing = false;
hr = m_dxgi_factory->CreateSwapChainForHwnd(
m_dev.get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.put());
if (FAILED(hr))
{
Console.Error("D3D11: CreateSwapChainForHwnd failed: 0x%08X", hr);
return false;
}
}
// MWA needs to be called on the correct factory.
wil::com_ptr_nothrow<IDXGIFactory> swap_chain_factory;
hr = m_swap_chain->GetParent(IID_PPV_ARGS(swap_chain_factory.put()));
if (SUCCEEDED(hr))
{
hr = swap_chain_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
if (FAILED(hr))
Console.ErrorFmt("D3D11: MakeWindowAssociation() to disable ALT+ENTER failed: {}", Error::CreateHResult(hr).GetDescription());
}
else
{
Console.ErrorFmt("D3D11: GetParent() on swap chain to get factory failed: {}", Error::CreateHResult(hr).GetDescription());
}
if (!CreateSwapChainRTV())
{
DestroySwapChain();
return false;
}
// Render a frame as soon as possible to clear out whatever was previously being displayed.
m_ctx->ClearRenderTargetView(m_swap_chain_rtv.get(), s_present_clear_color.data());
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
return true;
}
bool GSDevice11::CreateSwapChainRTV()
{
wil::com_ptr_nothrow<ID3D11Texture2D> backbuffer;
HRESULT hr = m_swap_chain->GetBuffer(0, IID_PPV_ARGS(backbuffer.put()));
if (FAILED(hr))
{
Console.Error("D3D11: GetBuffer for RTV failed: 0x%08X", hr);
return false;
}
D3D11_TEXTURE2D_DESC backbuffer_desc;
backbuffer->GetDesc(&backbuffer_desc);
CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(
D3D11_RTV_DIMENSION_TEXTURE2D, backbuffer_desc.Format, 0, 0, backbuffer_desc.ArraySize);
hr = m_dev->CreateRenderTargetView(backbuffer.get(), &rtv_desc, m_swap_chain_rtv.put());
if (FAILED(hr))
{
Console.Error("D3D11: CreateRenderTargetView for swap chain failed: 0x%08X", hr);
m_swap_chain_rtv.reset();
return false;
}
m_window_info.surface_width = backbuffer_desc.Width;
m_window_info.surface_height = backbuffer_desc.Height;
DevCon.WriteLn("D3D11: Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height);
if (m_window_info.type == WindowInfo::Type::Win32)
{
BOOL fullscreen = FALSE;
DXGI_SWAP_CHAIN_DESC desc;
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
SUCCEEDED(m_swap_chain->GetDesc(&desc)))
{
m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
}
}
return true;
}
void GSDevice11::DestroySwapChain()
{
if (!m_swap_chain)
return;
m_swap_chain_rtv.reset();
// switch out of fullscreen before destroying
BOOL is_fullscreen;
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
m_swap_chain->SetFullscreenState(FALSE, nullptr);
m_swap_chain.reset();
m_is_exclusive_fullscreen = false;
}
bool GSDevice11::UpdateWindow()
{
DestroySwapChain();
if (!AcquireWindow(false))
return false;
if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain())
{
Console.WriteLn("D3D11: Failed to create swap chain on updated window");
return false;
}
return true;
}
void GSDevice11::DestroySurface()
{
DestroySwapChain();
}
std::string GSDevice11::GetDriverInfo() const
{
std::string ret = "Unknown Feature Level";
static constexpr std::array<std::tuple<D3D_FEATURE_LEVEL, const char*>, 2> feature_level_names = {{
{D3D_FEATURE_LEVEL_10_0, "D3D_FEATURE_LEVEL_10_0"},
{D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"},
}};
for (size_t i = 0; i < std::size(feature_level_names); i++)
{
if (m_feature_level == std::get<0>(feature_level_names[i]))
{
ret = std::get<1>(feature_level_names[i]);
break;
}
}
ret += "\n";
wil::com_ptr_nothrow<IDXGIDevice> dxgi_dev;
if (m_dev.try_query_to(&dxgi_dev))
{
wil::com_ptr_nothrow<IDXGIAdapter> dxgi_adapter;
if (SUCCEEDED(dxgi_dev->GetAdapter(dxgi_adapter.put())))
{
DXGI_ADAPTER_DESC desc;
if (SUCCEEDED(dxgi_adapter->GetDesc(&desc)))
{
ret += StringUtil::StdStringFromFormat("VID: 0x%04X PID: 0x%04X\n", desc.VendorId, desc.DeviceId);
ret += StringUtil::WideStringToUTF8String(desc.Description);
ret += "\n";
const std::string driver_version(D3D::GetDriverVersionFromLUID(desc.AdapterLuid));
if (!driver_version.empty())
{
ret += "Driver Version: ";
ret += driver_version;
}
}
}
}
return ret;
}
void GSDevice11::ResizeWindow(u32 new_window_width, u32 new_window_height, float new_window_scale)
{
if (!m_swap_chain || m_is_exclusive_fullscreen)
return;
m_window_info.surface_scale = new_window_scale;
if (m_window_info.surface_width == new_window_width && m_window_info.surface_height == new_window_height)
return;
m_swap_chain_rtv.reset();
HRESULT hr = m_swap_chain->ResizeBuffers(
0, 0, 0, DXGI_FORMAT_UNKNOWN, m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
if (FAILED(hr))
Console.Error("D3D11: ResizeBuffers() failed: 0x%08X", hr);
if (!CreateSwapChainRTV())
pxFailRel("Failed to recreate swap chain RTV after resize");
}
bool GSDevice11::SupportsExclusiveFullscreen() const
{
return true;
}
GSDevice::PresentResult GSDevice11::BeginPresent(bool frame_skip)
{
if (frame_skip || !m_swap_chain)
return PresentResult::FrameSkipped;
// Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
// This might get called repeatedly if it takes a while to switch back, that's the host's problem.
BOOL is_fullscreen;
if (m_is_exclusive_fullscreen &&
(FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
{
Host::RunOnCPUThread([]() { Host::SetFullscreen(false); });
return PresentResult::FrameSkipped;
}
// When using vsync, the time here seems to include the time for the buffer to become available.
// This blows our our GPU usage number considerably, so read the timestamp before the final blit
// in this configuration. It does reduce accuracy a little, but better than seeing 100% all of
// the time, when it's more like a couple of percent.
if (m_vsync_mode == GSVSyncMode::FIFO && m_gpu_timing_enabled)
PopTimestampQuery();
m_ctx->ClearRenderTargetView(m_swap_chain_rtv.get(), s_present_clear_color.data());
m_ctx->OMSetRenderTargets(1, m_swap_chain_rtv.addressof(), nullptr);
if (m_state.rt_view)
m_state.rt_view->Release();
m_state.rt_view = m_swap_chain_rtv.get();
m_state.rt_view->AddRef();
m_state.cached_rt_view = nullptr;
if (m_state.dsv)
{
m_state.dsv->Release();
m_state.dsv = nullptr;
}
m_state.cached_dsv = nullptr;
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
const GSVector2i size = GetWindowSize();
SetViewport(size);
SetScissor(GSVector4i::loadh(size));
return PresentResult::OK;
}
void GSDevice11::EndPresent()
{
RenderImGui();
// See note in BeginPresent() for why it's conditional on vsync-off.
if (m_vsync_mode != GSVSyncMode::FIFO && m_gpu_timing_enabled)
PopTimestampQuery();
const UINT sync_interval = static_cast<UINT>(m_vsync_mode == GSVSyncMode::FIFO);
const UINT flags = (m_vsync_mode == GSVSyncMode::Disabled && m_using_allow_tearing) ? DXGI_PRESENT_ALLOW_TEARING : 0;
m_swap_chain->Present(sync_interval, flags);
if (m_gpu_timing_enabled)
KickTimestampQuery();
// clear out the swap chain view, it might get resized..
OMSetRenderTargets(nullptr, nullptr, nullptr, nullptr);
}
bool GSDevice11::CreateTimestampQueries()
{
for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++)
{
for (u32 j = 0; j < 3; j++)
{
const CD3D11_QUERY_DESC qdesc((j == 0) ? D3D11_QUERY_TIMESTAMP_DISJOINT : D3D11_QUERY_TIMESTAMP);
const HRESULT hr = m_dev->CreateQuery(&qdesc, m_timestamp_queries[i][j].put());
if (FAILED(hr))
{
m_timestamp_queries = {};
return false;
}
}
}
KickTimestampQuery();
return true;
}
void GSDevice11::DestroyTimestampQueries()
{
if (!m_timestamp_queries[0][0])
return;
if (m_timestamp_query_started)
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][1].get());
m_timestamp_queries = {};
m_read_timestamp_query = 0;
m_write_timestamp_query = 0;
m_waiting_timestamp_queries = 0;
m_timestamp_query_started = 0;
}
void GSDevice11::PopTimestampQuery()
{
while (m_waiting_timestamp_queries > 0)
{
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
const HRESULT disjoint_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][0].get(), &disjoint,
sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH);
if (disjoint_hr != S_OK)
break;
if (disjoint.Disjoint)
{
DevCon.WriteLn("D3D11: GPU timing disjoint, resetting.");
m_read_timestamp_query = 0;
m_write_timestamp_query = 0;
m_waiting_timestamp_queries = 0;
m_timestamp_query_started = 0;
}
else
{
u64 start = 0, end = 0;
const HRESULT start_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][1].get(), &start,
sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH);
const HRESULT end_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][2].get(), &end,
sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH);
if (start_hr == S_OK && end_hr == S_OK)
{
m_accumulated_gpu_time += static_cast<float>(
static_cast<double>(end - start) / (static_cast<double>(disjoint.Frequency) / 1000.0));
m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
m_waiting_timestamp_queries--;
}
else
{
break;
}
}
}
if (m_timestamp_query_started)
{
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][2].get());
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][0].get());
m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
m_timestamp_query_started = false;
m_waiting_timestamp_queries++;
}
}
void GSDevice11::KickTimestampQuery()
{
if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES)
return;
m_ctx->Begin(m_timestamp_queries[m_write_timestamp_query][0].get());
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][1].get());
m_timestamp_query_started = true;
}
bool GSDevice11::SetGPUTimingEnabled(bool enabled)
{
if (m_gpu_timing_enabled == enabled)
return true;
m_gpu_timing_enabled = enabled;
if (m_gpu_timing_enabled)
{
return CreateTimestampQueries();
}
else
{
DestroyTimestampQueries();
return true;
}
}
float GSDevice11::GetAndResetAccumulatedGPUTime()
{
const float value = m_accumulated_gpu_time;
m_accumulated_gpu_time = 0.0f;
return value;
}
void GSDevice11::DrawPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
PSUpdateShaderState(true, true);
m_ctx->Draw(m_vertex.count, m_vertex.start);
}
void GSDevice11::DrawIndexedPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
PSUpdateShaderState(true, true);
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
}
void GSDevice11::DrawIndexedPrimitive(int offset, int count)
{
pxAssert(offset + count <= (int)m_index.count);
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
PSUpdateShaderState(true, true);
m_ctx->DrawIndexed(count, m_index.start + offset, m_vertex.start);
}
void GSDevice11::CommitClear(GSTexture* t)
{
GSTexture11* T = static_cast<GSTexture11*>(t);
if (!T->IsRenderTargetOrDepthStencil() || T->GetState() == GSTexture::State::Dirty)
return;
if (T->IsDepthStencil())
{
if (T->GetState() == GSTexture::State::Invalidated)
m_ctx->DiscardView(static_cast<ID3D11DepthStencilView*>(*T));
else
m_ctx->ClearDepthStencilView(*T, D3D11_CLEAR_DEPTH, T->GetClearDepth(), 0);
}
else
{
if (T->GetState() == GSTexture::State::Invalidated)
m_ctx->DiscardView(static_cast<ID3D11RenderTargetView*>(*T));
else
m_ctx->ClearRenderTargetView(*T, T->GetUNormClearColor().F32);
}
T->SetState(GSTexture::State::Dirty);
}
void GSDevice11::PushDebugGroup(const char* fmt, ...)
{
if (!m_annotation)
return;
std::va_list ap;
va_start(ap, fmt);
std::string str(StringUtil::StdStringFromFormatV(fmt, ap));
va_end(ap);
m_annotation->BeginEvent(StringUtil::UTF8StringToWideString(str).c_str());
}
void GSDevice11::PopDebugGroup()
{
if (!m_annotation)
return;
m_annotation->EndEvent();
}
void GSDevice11::InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...)
{
if (!m_annotation)
return;
std::va_list ap;
va_start(ap, fmt);
std::string str(StringUtil::StdStringFromFormatV(fmt, ap));
va_end(ap);
m_annotation->SetMarker(StringUtil::UTF8StringToWideString(str).c_str());
}
GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
{
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = width;
desc.Height = height;
desc.Format = GSTexture11::GetDXGIFormat(format);
desc.MipLevels = levels;
desc.ArraySize = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Usage = D3D11_USAGE_DEFAULT;
switch (type)
{
case GSTexture::Type::RenderTarget:
desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::Type::DepthStencil:
desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
break;
case GSTexture::Type::Texture:
desc.BindFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? (D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE) : D3D11_BIND_SHADER_RESOURCE;
desc.MiscFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0;
break;
case GSTexture::Type::RWTexture:
desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
break;
default:
break;
}
wil::com_ptr_nothrow<ID3D11Texture2D> texture;
HRESULT hr = m_dev->CreateTexture2D(&desc, nullptr, texture.put());
if (FAILED(hr))
{
Console.Error("D3D11: Failed to allocate %dx%d surface", width, height);
return nullptr;
}
return new GSTexture11(std::move(texture), desc, type, format);
}
std::unique_ptr<GSDownloadTexture> GSDevice11::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
return GSDownloadTexture11::Create(width, height, format);
}
void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
{
// Empty rect, abort copy.
if (r.rempty())
{
GL_INS("D3D11: CopyRect rect empty.");
return;
}
const GSVector4i src_rect(0, 0, sTex->GetWidth(), sTex->GetHeight());
const GSVector4i dst_rect(0, 0, dTex->GetWidth(), dTex->GetHeight());
const bool src_dst_rect_match = src_rect.eq(dst_rect);
// Sizes must match for full depth copies when no partial copies are supported.
if (sTex->IsDepthStencil() && !src_dst_rect_match)
{
GL_INS("D3D11: CopyRect rect mismatch for full depth copy.");
return;
}
const bool full_draw_copy = sTex->IsDepthStencil() || dst_rect.eq(r);
// Source is cleared, if destination is a render target, we can carry the clear forward.
if (sTex->GetState() == GSTexture::State::Cleared)
{
if (dTex->IsRenderTargetOrDepthStencil() && ProcessClearsBeforeCopy(sTex, dTex, full_draw_copy))
return;
// Commit clear for the source texture.
CommitClear(sTex);
}
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
// Commit destination clear if partially overwritten (color only).
if (dTex->GetState() == GSTexture::State::Cleared && !full_draw_copy)
CommitClear(dTex);
// DX11 doesn't support partial depth copy so we need to
// either pass a nullptr D3D11_BOX for a full depth copy or use CopyResource instead.
// Optimization: Use CopyResource for depth copies or full rect color copies, it's faster than CopySubresourceRegion.
const bool full_rt_copy = src_dst_rect_match && (sTex->IsDepthStencil() || (destX == 0 && destY == 0 && r.eq(src_rect)));
if (full_rt_copy)
{
m_ctx->CopyResource(*static_cast<GSTexture11*>(dTex), *static_cast<GSTexture11*>(sTex));
}
else
{
const D3D11_BOX box = {static_cast<UINT>(r.left), static_cast<UINT>(r.top), 0U, static_cast<UINT>(r.right), static_cast<UINT>(r.bottom), 1U};
m_ctx->CopySubresourceRegion(*static_cast<GSTexture11*>(dTex), 0, destX, destY, 0, *static_cast<GSTexture11*>(sTex), 0, &box);
}
dTex->SetState(GSTexture::State::Dirty);
}
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
GSHWDrawConfig::ColorMaskSelector cms, ShaderConvert shader, bool linear)
{
DoStretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr, m_convert.bs[cms.wrgba].get(), linear);
}
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear)
{
DoStretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), linear);
}
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear)
{
CommitClear(sTex);
const bool draw_in_depth = dTex && dTex->IsDepthStencil();
GSVector2i ds;
if (dTex)
{
// ps unbind conflicting srvs
PSUnbindConflictingSRVs(dTex);
ds = dTex->GetSize();
if (draw_in_depth)
OMSetRenderTargets(nullptr, dTex);
else
OMSetRenderTargets(dTex, nullptr);
}
else
{
ds = GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
}
// om
if (draw_in_depth)
OMSetDepthStencilState(m_convert.dss_write.get(), 0);
else
OMSetDepthStencilState(m_convert.dss.get(), 0);
OMSetBlendState(bs, 0);
// ia
const float left = dRect.x * 2 / ds.x - 1.0f;
const float top = 1.0f - dRect.y * 2 / ds.y;
const float right = dRect.z * 2 / ds.x - 1.0f;
const float bottom = 1.0f - dRect.w * 2 / ds.y;
GSVertexPT1 vertices[] =
{
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
};
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
IASetInputLayout(m_convert.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
// vs
VSSetShader(m_convert.vs.get(), nullptr);
// ps
PSSetShaderResource(0, sTex);
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
PSSetShader(ps, ps_cb);
// draw
DrawPrimitive();
}
void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear)
{
CommitClear(sTex);
GSVector2i ds;
if (dTex)
{
// ps unbind conflicting srvs
PSUnbindConflictingSRVs(dTex);
ds = dTex->GetSize();
OMSetRenderTargets(dTex, nullptr);
}
else
{
ds = GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
}
DisplayConstantBuffer cb;
cb.SetSource(sRect, sTex->GetSize());
cb.SetTarget(dRect, ds);
cb.SetTime(shaderTime);
m_ctx->UpdateSubresource(m_present.ps_cb.get(), 0, nullptr, &cb, 0, 0);
// om
OMSetDepthStencilState(m_convert.dss.get(), 0);
OMSetBlendState(m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), 0);
// ia
const float left = dRect.x * 2 / ds.x - 1.0f;
const float top = 1.0f - dRect.y * 2 / ds.y;
const float right = dRect.z * 2 / ds.x - 1.0f;
const float bottom = 1.0f - dRect.w * 2 / ds.y;
GSVertexPT1 vertices[] =
{
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
};
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
IASetInputLayout(m_present.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
// vs
VSSetShader(m_present.vs.get(), nullptr);
// ps
PSSetShaderResource(0, sTex);
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
PSSetShader(m_present.ps[static_cast<u32>(shader)].get(), m_present.ps_cb.get());
// draw
DrawPrimitive();
}
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
// match merge cb
struct Uniforms
{
float scale;
float pad1[3];
u32 offsetX, offsetY, dOffset;
};
const Uniforms cb = {sScale, {}, offsetX, offsetY, dOffset};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const GSVector4 dRect(0, 0, dSize, 1);
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
{
// match merge cb
struct Uniforms
{
float scale;
float pad1[3];
u32 SBW, DBW, SPSM;
};
const Uniforms cb = {sScale, {}, SBW, DBW, SPSM};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
const ShaderConvert shader = ((SPSM & 0xE) == 0) ? ShaderConvert::RGBA_TO_8I : ShaderConvert::RGB5A1_TO_8I;
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min, const GSVector4& dRect)
{
struct Uniforms
{
float weight;
float step_multiplier;
float pad0[2];
GSVector2i clamp_min;
int downsample_factor;
int pad1;
};
const Uniforms cb = {
static_cast<float>(downsample_factor * downsample_factor), (GSConfig.UserHacks_NativeScaling > GSNativeScaling::Aggressive) ? 2.0f : 1.0f, {}, clamp_min, static_cast<int>(downsample_factor), 0};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
IASetInputLayout(m_convert.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
VSSetShader(m_convert.vs.get(), nullptr);
PSSetShader(m_convert.ps[static_cast<int>(shader)].get(), nullptr);
PSUnbindConflictingSRVs(dTex);
OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0);
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr);
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
GSTexture* last_tex = rects[0].src;
bool last_linear = rects[0].linear;
u8 last_wmask = rects[0].wmask.wrgba;
u32 first = 0;
u32 count = 1;
for (u32 i = 1; i < num_rects; i++)
{
if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask)
{
count++;
continue;
}
DoMultiStretchRects(rects + first, count, ds);
last_tex = rects[i].src;
last_linear = rects[i].linear;
last_wmask = rects[i].wmask.wrgba;
first += count;
count = 1;
}
DoMultiStretchRects(rects + first, count, ds);
}
void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
{
// Don't use primitive restart here, it ends up slower on some drivers.
const u32 vertex_reserve_size = num_rects * 4;
const u32 index_reserve_size = num_rects * 6;
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
u16* idx = IAMapIndexBuffer(index_reserve_size);
u32 icount = 0;
u32 vcount = 0;
for (u32 i = 0; i < num_rects; i++)
{
const GSVector4& sRect = rects[i].src_rect;
const GSVector4& dRect = rects[i].dst_rect;
const float left = dRect.x * 2 / ds.x - 1.0f;
const float top = 1.0f - dRect.y * 2 / ds.y;
const float right = dRect.z * 2 / ds.x - 1.0f;
const float bottom = 1.0f - dRect.w * 2 / ds.y;
const u32 vstart = vcount;
verts[vcount++] = {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)};
verts[vcount++] = {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)};
verts[vcount++] = {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)};
verts[vcount++] = {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)};
if (i > 0)
idx[icount++] = vstart;
idx[icount++] = vstart;
idx[icount++] = vstart + 1;
idx[icount++] = vstart + 2;
idx[icount++] = vstart + 3;
idx[icount++] = vstart + 3;
};
IAUnmapVertexBuffer(sizeof(GSVertexPT1), vcount);
IAUnmapIndexBuffer(icount);
IASetIndexBuffer(m_ib.get());
CommitClear(rects[0].src);
PSSetShaderResource(0, rects[0].src);
PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get());
OMSetBlendState(m_convert.bs[rects[0].wmask.wrgba].get(), 0.0f);
DrawIndexedPrimitive();
}
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear)
{
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);
const bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1;
const bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0;
const bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1;
// Merge the 2 source textures (sTex[0],sTex[1]). Final results go to dTex. Feedback write will go to sTex[2].
// If either 2nd output is disabled or SLBG is 1, a background color will be used.
// Note: background color is also used when outside of the unit rectangle area
ClearRenderTarget(dTex, c);
// Upload constant to select YUV algo, but skip constant buffer update if we don't need it
if (feedback_write_2 || feedback_write_1 || sTex[0])
{
const MergeConstantBuffer cb = {GSVector4::unorm8(c), EXTBUF.EMODA, EXTBUF.EMODC};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
}
if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg))
{
// 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output
// Note: value outside of dRect must contains the background color (c)
StretchRect(sTex[1], sRect[1], dTex, PMODE.SLBG ? dRect[2] : dRect[1], ShaderConvert::COPY, linear);
}
// Save 2nd output
if (feedback_write_2)
{
DoStretchRect(dTex, full_r, sTex[2], dRect[2], m_convert.ps[static_cast<int>(ShaderConvert::YUV)].get(),
m_merge.cb.get(), nullptr, linear);
}
// Restore background color to process the normal merge
if (feedback_write_2_but_blend_bg)
ClearRenderTarget(dTex, c);
if (sTex[0])
{
// 1st output is enabled. It must be blended
DoStretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge.ps[PMODE.MMOD].get(), m_merge.cb.get(), m_merge.bs.get(), linear);
}
if (feedback_write_1)
{
DoStretchRect(dTex, full_r, sTex[2], dRect[2], m_convert.ps[static_cast<int>(ShaderConvert::YUV)].get(),
m_merge.cb.get(), nullptr, linear);
}
}
void GSDevice11::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb)
{
m_ctx->UpdateSubresource(m_interlace.cb.get(), 0, nullptr, &cb, 0, 0);
DoStretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[static_cast<int>(shader)].get(), m_interlace.cb.get(), linear);
}
void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex)
{
const GSVector2i s = dTex->GetSize();
const GSVector4 sRect(0, 0, 1, 1);
const GSVector4 dRect(0, 0, s.x, s.y);
if (!m_fxaa_ps)
{
const std::optional<std::string> shader = ReadShaderSource("shaders/common/fxaa.fx");
if (!shader.has_value())
{
Console.Error("D3D11: FXAA shader is missing");
return;
}
ShaderMacro sm;
sm.AddMacro("FXAA_HLSL", "1");
m_fxaa_ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm.GetPtr(), "main");
if (!m_fxaa_ps)
return;
}
DoStretchRect(sTex, sRect, dTex, dRect, m_fxaa_ps.get(), nullptr, true);
}
void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4])
{
const GSVector2i s = dTex->GetSize();
const GSVector4 sRect(0, 0, 1, 1);
const GSVector4 dRect(0, 0, s.x, s.y);
m_ctx->UpdateSubresource(m_shadeboost.cb.get(), 0, nullptr, params, 0, 0);
DoStretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps.get(), m_shadeboost.cb.get(), false);
}
void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb)
{
auto i = std::as_const(m_vs).find(sel.key);
if (i == m_vs.end())
{
ShaderMacro sm;
sm.AddMacro("VERTEX_SHADER", 1);
sm.AddMacro("VS_TME", sel.tme);
sm.AddMacro("VS_FST", sel.fst);
sm.AddMacro("VS_IIP", sel.iip);
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
{
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
};
GSVertexShader11 vs;
if (sel.expand == GSHWDrawConfig::VSExpand::None)
{
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), vs.vs.put(), vs.il.put(), layout,
std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
}
else
{
vs.vs = m_shader_cache.GetVertexShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "vs_main_expand");
}
i = m_vs.try_emplace(sel.key, std::move(vs)).first;
}
if (m_vs_cb_cache.Update(*cb))
{
m_ctx->UpdateSubresource(m_vs_cb.get(), 0, NULL, cb, 0, 0);
}
VSSetShader(i->second.vs.get(), m_vs_cb.get());
IASetInputLayout(i->second.il.get());
}
void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
{
auto i = std::as_const(m_ps).find(sel);
if (i == m_ps.end())
{
ShaderMacro sm;
sm.AddMacro("PIXEL_SHADER", 1);
sm.AddMacro("PS_FST", sel.fst);
sm.AddMacro("PS_WMS", sel.wms);
sm.AddMacro("PS_WMT", sel.wmt);
sm.AddMacro("PS_ADJS", sel.adjs);
sm.AddMacro("PS_ADJT", sel.adjt);
sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
sm.AddMacro("PS_AEM", sel.aem);
sm.AddMacro("PS_TFX", sel.tfx);
sm.AddMacro("PS_TCC", sel.tcc);
sm.AddMacro("PS_DATE", sel.date);
sm.AddMacro("PS_ATST", sel.atst);
sm.AddMacro("PS_AFAIL", sel.afail);
sm.AddMacro("PS_FOG", sel.fog);
sm.AddMacro("PS_IIP", sel.iip);
sm.AddMacro("PS_BLEND_HW", sel.blend_hw);
sm.AddMacro("PS_A_MASKED", sel.a_masked);
sm.AddMacro("PS_FBA", sel.fba);
sm.AddMacro("PS_FBMASK", sel.fbmask);
sm.AddMacro("PS_LTF", sel.ltf);
sm.AddMacro("PS_TCOFFSETHACK", sel.tcoffsethack);
sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler);
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
sm.AddMacro("PS_READ16_SRC", sel.real16src);
sm.AddMacro("PS_WRITE_RG", sel.write_rg);
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);
sm.AddMacro("PS_DST_FMT", sel.dst_fmt);
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_COLCLIP_HW", sel.colclip_hw);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);
sm.AddMacro("PS_BLEND_C", sel.blend_c);
sm.AddMacro("PS_BLEND_D", sel.blend_d);
sm.AddMacro("PS_BLEND_MIX", sel.blend_mix);
sm.AddMacro("PS_ROUND_INV", sel.round_inv);
sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a);
sm.AddMacro("PS_PABE", sel.pabe);
sm.AddMacro("PS_DITHER", sel.dither);
sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust);
sm.AddMacro("PS_ZCLAMP", sel.zclamp);
sm.AddMacro("PS_ZFLOOR", sel.zfloor);
sm.AddMacro("PS_SCANMSK", sel.scanmsk);
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);
sm.AddMacro("PS_MANUAL_LOD", sel.manual_lod);
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
sm.AddMacro("PS_NO_COLOR", sel.no_color);
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
i = m_ps.try_emplace(sel, std::move(ps)).first;
}
if (cb && m_ps_cb_cache.Update(*cb))
{
m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0);
}
wil::com_ptr_nothrow<ID3D11SamplerState> ss0;
if (sel.tfx != 4)
{
if (sel.pal_fmt || sel.wms >= 3 || sel.wmt >= 3)
{
pxAssert(ssel.biln == 0);
}
auto i = std::as_const(m_ps_ss).find(ssel.key);
if (i != m_ps_ss.end())
{
ss0 = i->second;
}
else
{
D3D11_SAMPLER_DESC sd = {};
const int anisotropy = GSConfig.MaxAnisotropy;
if (anisotropy > 1 && ssel.aniso)
{
sd.Filter = D3D11_FILTER_ANISOTROPIC;
}
else
{
static constexpr std::array<D3D11_FILTER, 8> filters = {{
D3D11_FILTER_MIN_MAG_MIP_POINT, // 000 / min=point,mag=point,mip=point
D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT, // 001 / min=linear,mag=point,mip=point
D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT, // 010 / min=point,mag=linear,mip=point
D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT, // 011 / min=linear,mag=linear,mip=point
D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR, // 100 / min=point,mag=point,mip=linear
D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, // 101 / min=linear,mag=point,mip=linear
D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR, // 110 / min=point,mag=linear,mip=linear
D3D11_FILTER_MIN_MAG_MIP_LINEAR, // 111 / min=linear,mag=linear,mip=linear
}};
const u8 index = (static_cast<u8>(ssel.IsMipFilterLinear()) << 2) |
(static_cast<u8>(ssel.IsMagFilterLinear()) << 1) |
static_cast<u8>(ssel.IsMinFilterLinear());
sd.Filter = filters[index];
}
sd.AddressU = ssel.tau ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressV = ssel.tav ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.MinLOD = 0.0f;
sd.MaxLOD = (ssel.lodclamp || !ssel.UseMipmapFiltering()) ? 0.25f : FLT_MAX;
sd.MaxAnisotropy = std::clamp(anisotropy, 1, 16);
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
m_dev->CreateSamplerState(&sd, &ss0);
m_ps_ss[ssel.key] = ss0;
}
}
PSSetSamplerState(ss0.get());
PSSetShader(i->second.get(), m_ps_cb.get());
}
void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix)
{
auto i = std::as_const(m_om_dss).find(dssel.key);
if (i == m_om_dss.end())
{
D3D11_DEPTH_STENCIL_DESC dsd;
memset(&dsd, 0, sizeof(dsd));
if (dssel.date)
{
dsd.StencilEnable = true;
dsd.StencilReadMask = 1;
dsd.StencilWriteMask = 1;
dsd.FrontFace.StencilFunc = D3D11_COMPARISON_EQUAL;
dsd.FrontFace.StencilPassOp = dssel.date_one ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP;
dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
dsd.BackFace.StencilFunc = D3D11_COMPARISON_EQUAL;
dsd.BackFace.StencilPassOp = dssel.date_one ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP;
dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
}
if (dssel.ztst != ZTST_ALWAYS || dssel.zwe)
{
static const D3D11_COMPARISON_FUNC ztst[] =
{
D3D11_COMPARISON_NEVER,
D3D11_COMPARISON_ALWAYS,
D3D11_COMPARISON_GREATER_EQUAL,
D3D11_COMPARISON_GREATER
};
dsd.DepthEnable = true;
dsd.DepthWriteMask = dssel.zwe ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
dsd.DepthFunc = ztst[dssel.ztst];
}
wil::com_ptr_nothrow<ID3D11DepthStencilState> dss;
m_dev->CreateDepthStencilState(&dsd, dss.put());
i = m_om_dss.try_emplace(dssel.key, std::move(dss)).first;
}
OMSetDepthStencilState(i->second.get(), 1);
auto j = std::as_const(m_om_bs).find(bsel.key);
if (j == m_om_bs.end())
{
D3D11_BLEND_DESC bd;
memset(&bd, 0, sizeof(bd));
if (bsel.blend.IsEffective(bsel.colormask))
{
// clang-format off
static constexpr std::array<D3D11_BLEND, 16> s_d3d11_blend_factors = { {
D3D11_BLEND_SRC_COLOR, D3D11_BLEND_INV_SRC_COLOR, D3D11_BLEND_DEST_COLOR, D3D11_BLEND_INV_DEST_COLOR,
D3D11_BLEND_SRC1_COLOR, D3D11_BLEND_INV_SRC1_COLOR, D3D11_BLEND_SRC_ALPHA, D3D11_BLEND_INV_SRC_ALPHA,
D3D11_BLEND_DEST_ALPHA, D3D11_BLEND_INV_DEST_ALPHA, D3D11_BLEND_SRC1_ALPHA, D3D11_BLEND_INV_SRC1_ALPHA,
D3D11_BLEND_BLEND_FACTOR, D3D11_BLEND_INV_BLEND_FACTOR, D3D11_BLEND_ONE, D3D11_BLEND_ZERO
} };
static constexpr std::array<D3D11_BLEND_OP, 4> s_d3d11_blend_ops = { {
D3D11_BLEND_OP_ADD, D3D11_BLEND_OP_SUBTRACT, D3D11_BLEND_OP_REV_SUBTRACT, D3D11_BLEND_OP_MIN
} };
// clang-format on
bd.RenderTarget[0].BlendEnable = bsel.blend.enable;
bd.RenderTarget[0].BlendOp = s_d3d11_blend_ops[bsel.blend.op];
bd.RenderTarget[0].SrcBlend = s_d3d11_blend_factors[bsel.blend.src_factor];
bd.RenderTarget[0].DestBlend = s_d3d11_blend_factors[bsel.blend.dst_factor];
bd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
bd.RenderTarget[0].SrcBlendAlpha = s_d3d11_blend_factors[bsel.blend.src_factor_alpha];
bd.RenderTarget[0].DestBlendAlpha = s_d3d11_blend_factors[bsel.blend.dst_factor_alpha];
}
if (bsel.colormask.wr)
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED;
if (bsel.colormask.wg)
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
if (bsel.colormask.wb)
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE;
if (bsel.colormask.wa)
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA;
wil::com_ptr_nothrow<ID3D11BlendState> bs;
m_dev->CreateBlendState(&bd, bs.put());
j = m_om_bs.try_emplace(bsel.key, std::move(bs)).first;
}
OMSetBlendState(j->second.get(), afix);
}
bool GSDevice11::CreateCASShaders()
{
CD3D11_BUFFER_DESC desc(NUM_CAS_CONSTANTS * sizeof(u32), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
HRESULT hr = m_dev->CreateBuffer(&desc, nullptr, m_cas.cb.put());
if (FAILED(hr))
return false;
std::optional<std::string> cas_source = ReadShaderSource("shaders/dx11/cas.hlsl");
if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value()))
return false;
static constexpr D3D_SHADER_MACRO sharpen_only_macros[] = {
{"CAS_SHARPEN_ONLY", "1"},
{nullptr, nullptr}};
m_cas.cs_sharpen = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), sharpen_only_macros, "main");
m_cas.cs_upscale = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), nullptr, "main");
if (!m_cas.cs_sharpen || !m_cas.cs_upscale)
{
Console.Error("D3D11: Failed to create CAS compute shaders.");
return false;
}
return true;
}
bool GSDevice11::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
{
static const int threadGroupWorkRegionDim = 16;
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
ID3D11ShaderResourceView* srvs[1] = {*static_cast<GSTexture11*>(sTex)};
ID3D11UnorderedAccessView* uavs[1] = {*static_cast<GSTexture11*>(dTex)};
m_ctx->OMSetRenderTargets(0, nullptr, nullptr);
m_ctx->UpdateSubresource(m_cas.cb.get(), 0, nullptr, constants.data(), 0, 0);
m_ctx->CSSetConstantBuffers(0, 1, m_cas.cb.addressof());
m_ctx->CSSetShader(sharpen_only ? m_cas.cs_sharpen.get() : m_cas.cs_upscale.get(), nullptr, 0);
m_ctx->CSSetShaderResources(0, std::size(srvs), srvs);
m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr);
m_ctx->Dispatch(dispatchX, dispatchY, 1);
// clear bindings out to prevent hazards
uavs[0] = nullptr;
srvs[0] = nullptr;
m_ctx->CSSetShaderResources(0, std::size(srvs), srvs);
m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr);
return true;
}
bool GSDevice11::CreateImGuiResources()
{
HRESULT hr;
const std::optional<std::string> hlsl = ReadShaderSource("shaders/dx11/imgui.fx");
if (!hlsl.has_value())
{
Console.Error("D3D11: Failed to read imgui.fx");
return false;
}
// clang-format off
static constexpr D3D11_INPUT_ELEMENT_DESC layout[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)offsetof(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
// clang-format on
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_imgui.vs.put(), m_imgui.il.put(), layout,
std::size(layout), hlsl.value(), nullptr, "vs_main") ||
!(m_imgui.ps = m_shader_cache.GetPixelShader(m_dev.get(), hlsl.value(), nullptr, "ps_main")))
{
Console.Error("D3D11: Failed to compile ImGui shaders");
return false;
}
D3D11_BLEND_DESC blend_desc = {};
blend_desc.RenderTarget[0].BlendEnable = true;
blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
blend_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
hr = m_dev->CreateBlendState(&blend_desc, m_imgui.bs.put());
if (FAILED(hr))
{
Console.Error("D3D11: CreateImGuiResources(): CreateBlendState() failed: %08X", hr);
return false;
}
D3D11_BUFFER_DESC buffer_desc = {};
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
buffer_desc.ByteWidth = sizeof(float) * 4 * 4;
buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
hr = m_dev->CreateBuffer(&buffer_desc, nullptr, m_imgui.vs_cb.put());
if (FAILED(hr))
{
Console.Error("D3D11: CreateImGuiResources(): CreateBlendState() failed: %08X", hr);
return false;
}
return true;
}
void GSDevice11::RenderImGui()
{
ImGui::Render();
const ImDrawData* draw_data = ImGui::GetDrawData();
if (draw_data->CmdListsCount == 0)
return;
UpdateImGuiTextures();
const float L = 0.0f;
const float R = static_cast<float>(m_window_info.surface_width);
const float T = 0.0f;
const float B = static_cast<float>(m_window_info.surface_height);
// clang-format off
const float ortho_projection[4][4] =
{
{ 2.0f/(R-L), 0.0f, 0.0f, 0.0f },
{ 0.0f, 2.0f/(T-B), 0.0f, 0.0f },
{ 0.0f, 0.0f, 0.5f, 0.0f },
{ (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f },
};
// clang-format on
m_ctx->UpdateSubresource(m_imgui.vs_cb.get(), 0, nullptr, ortho_projection, 0, 0);
const UINT vb_stride = sizeof(ImDrawVert);
const UINT vb_offset = 0;
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
IASetInputLayout(m_imgui.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
PSSetShader(m_imgui.ps.get(), nullptr);
OMSetBlendState(m_imgui.bs.get(), 0.0f);
OMSetDepthStencilState(m_convert.dss.get(), 0);
PSSetSamplerState(m_convert.ln.get());
// Render command lists
for (int n = 0; n < draw_data->CmdListsCount; n++)
{
const ImDrawList* cmd_list = draw_data->CmdLists[n];
// This mess is because the vertex size isn't the same...
u32 vertex_offset;
{
static_assert(std::has_single_bit(sizeof(GSVertexPT1)));
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
const u32 unaligned_size = cmd_list->VtxBuffer.Size * sizeof(ImDrawVert);
u32 start_pos = Common::AlignUp(m_vb_pos, sizeof(ImDrawVert));
u32 end_pos = Common::AlignUpPow2(start_pos + unaligned_size, sizeof(GSVertexPT1));
if (end_pos > VERTEX_BUFFER_SIZE)
{
type = D3D11_MAP_WRITE_DISCARD;
m_vb_pos = 0;
start_pos = 0;
end_pos = Common::AlignUpPow2(unaligned_size, sizeof(GSVertexPT1));
}
m_vb_pos = end_pos;
vertex_offset = start_pos / sizeof(ImDrawVert);
D3D11_MAPPED_SUBRESOURCE sr;
const HRESULT hr = m_ctx->Map(m_vb.get(), 0, type, 0, &sr);
if (FAILED(hr))
continue;
std::memcpy(static_cast<u8*>(sr.pData) + start_pos, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert));
m_ctx->Unmap(m_vb.get(), 0);
}
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
pxAssert(!pcmd->UserCallback);
const GSVector4 clip = GSVector4::load<false>(&pcmd->ClipRect);
if ((clip.zwzw() <= clip.xyxy()).mask() != 0)
continue;
const GSVector4i iclip = GSVector4i(clip);
if (!m_state.scissor.eq(iclip))
{
m_state.scissor = iclip;
m_ctx->RSSetScissorRects(1, reinterpret_cast<const D3D11_RECT*>(&iclip));
}
// Since we don't have the GSTexture...
m_state.ps_sr_views[0] = reinterpret_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
PSUpdateShaderState(true, true);
m_ctx->DrawIndexed(pcmd->ElemCount, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
}
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
}
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
}
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox)
{
// sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows
CommitClear(rt);
CommitClear(ds);
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(ds), D3D11_CLEAR_STENCIL, 0.0f, 0);
// ps unbind conflicting srvs
PSUnbindConflictingSRVs(ds);
// om
OMSetDepthStencilState(m_date.dss.get(), 1);
OMSetBlendState(m_date.bs.get(), 0);
OMSetRenderTargets(nullptr, ds);
// ia
const GSVector4 src = GSVector4(bbox) / GSVector4(ds->GetSize()).xyxy();
const GSVector4 dst = src * 2.0f - 1.0f;
const GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
IASetVertexBuffer(vertices, sizeof(vertices[0]), 4);
IASetInputLayout(m_convert.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
// vs
VSSetShader(m_convert.vs.get(), nullptr);
// ps
PSSetShaderResource(0, rt);
PSSetSamplerState(m_convert.pt.get());
PSSetShader(m_convert.ps[SetDATMShader(datm)].get(), nullptr);
// draw
DrawPrimitive();
}
void* GSDevice11::IAMapVertexBuffer(u32 stride, u32 count)
{
const u32 size = stride * count;
if (size > VERTEX_BUFFER_SIZE)
return nullptr;
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
m_vertex.start = (m_vb_pos + (stride - 1)) / stride;
m_vb_pos = (m_vertex.start * stride) + size;
if (m_vb_pos > VERTEX_BUFFER_SIZE)
{
m_vertex.start = 0;
m_vb_pos = size;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if (FAILED(m_ctx->Map(m_vb.get(), 0, type, 0, &m)))
return nullptr;
return static_cast<u8*>(m.pData) + (m_vertex.start * stride);
}
void GSDevice11::IAUnmapVertexBuffer(u32 stride, u32 count)
{
m_ctx->Unmap(m_vb.get(), 0);
if (m_state.vb_stride != stride)
{
m_state.vb_stride = stride;
const UINT vb_offset = 0;
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &stride, &vb_offset);
}
m_vertex.count = count;
}
bool GSDevice11::IASetVertexBuffer(const void* vertex, u32 stride, u32 count)
{
void* map = IAMapVertexBuffer(stride, count);
if (!map)
return false;
GSVector4i::storent(map, vertex, count * stride);
IAUnmapVertexBuffer(stride, count);
return true;
}
bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count)
{
const u32 size = stride * count;
if (size > VERTEX_BUFFER_SIZE)
return false;
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
m_vertex.start = (m_structured_vb_pos + (stride - 1)) / stride;
m_structured_vb_pos = (m_vertex.start * stride) + size;
if (m_structured_vb_pos > VERTEX_BUFFER_SIZE)
{
m_vertex.start = 0;
m_structured_vb_pos = size;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if (FAILED(m_ctx->Map(m_expand_vb.get(), 0, type, 0, &m)))
return false;
void* map = static_cast<u8*>(m.pData) + (m_vertex.start * stride);
GSVector4i::storent(map, vertex, count * stride);
m_ctx->Unmap(m_expand_vb.get(), 0);
m_vertex.count = count;
return true;
}
u16* GSDevice11::IAMapIndexBuffer(u32 count)
{
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
return nullptr;
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
m_index.start = m_ib_pos;
m_ib_pos += count;
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u16)))
{
m_index.start = 0;
m_ib_pos = count;
type = D3D11_MAP_WRITE_DISCARD;
}
D3D11_MAPPED_SUBRESOURCE m;
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
return nullptr;
return static_cast<u16*>(m.pData) + m_index.start;
}
void GSDevice11::IAUnmapIndexBuffer(u32 count)
{
m_ctx->Unmap(m_ib.get(), 0);
m_index.count = count;
}
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
{
u16* map = IAMapIndexBuffer(count);
if (!map)
return false;
std::memcpy(map, index, count * sizeof(u16));
IAUnmapIndexBuffer(count);
IASetIndexBuffer(m_ib.get());
return true;
}
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
{
if (m_state.index_buffer != buffer)
{
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R16_UINT, 0);
m_state.index_buffer = buffer;
}
}
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
{
if (m_state.layout != layout)
{
m_state.layout = layout;
m_ctx->IASetInputLayout(layout);
}
}
void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology)
{
if (m_state.topology != topology)
{
m_state.topology = topology;
m_ctx->IASetPrimitiveTopology(topology);
}
}
void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
{
if (m_state.vs != vs)
{
m_state.vs = vs;
m_ctx->VSSetShader(vs, nullptr, 0);
}
if (m_state.vs_cb != vs_cb)
{
m_state.vs_cb = vs_cb;
m_ctx->VSSetConstantBuffers(0, 1, &vs_cb);
}
}
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
{
m_state.ps_sr_views[i] = *static_cast<GSTexture11*>(sr);
}
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0)
{
m_state.ps_ss[0] = ss0;
}
void GSDevice11::ClearSamplerCache()
{
m_ps_ss.clear();
}
void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
{
if (m_state.ps != ps)
{
m_state.ps = ps;
m_ctx->PSSetShader(ps, nullptr, 0);
}
if (m_state.ps_cb != ps_cb)
{
m_state.ps_cb = ps_cb;
m_ctx->PSSetConstantBuffers(0, 1, &ps_cb);
}
}
void GSDevice11::PSUpdateShaderState(const bool sr_update, const bool ss_update)
{
// Shader resource caching requires srv/rtv hazards to be resolved, ensure PSUnbindConflictingSRVs handle.
if (sr_update)
{
bool sr_changed = false;
for (size_t i = 0; i < m_state.ps_sr_views.size(); ++i)
{
if (m_state.ps_cached_sr_views[i] != m_state.ps_sr_views[i])
{
sr_changed = true;
break;
}
}
if (sr_changed)
{
m_state.ps_cached_sr_views = m_state.ps_sr_views;
m_ctx->PSSetShaderResources(0, m_state.ps_sr_views.size(), m_state.ps_sr_views.data());
}
}
if (ss_update)
{
bool ss_changed = false;
for (size_t i = 0; i < m_state.ps_ss.size(); ++i)
{
if (m_state.ps_cached_ss[i] != m_state.ps_ss[i])
{
ss_changed = true;
break;
}
}
if (ss_changed)
{
m_state.ps_cached_ss = m_state.ps_ss;
m_ctx->PSSetSamplers(0, m_state.ps_ss.size(), m_state.ps_ss.data());
}
}
}
void GSDevice11::PSUnbindConflictingSRVs(GSTexture* tex1, GSTexture* tex2)
{
// Make sure no SRVs are bound using the same texture before binding it to a RTV.
bool changed = false;
for (size_t i = 0; i < m_state.ps_sr_views.size(); i++)
{
if ((tex1 && m_state.ps_sr_views[i] == *static_cast<GSTexture11*>(tex1)) || (tex2 && m_state.ps_sr_views[i] == *static_cast<GSTexture11*>(tex2)))
{
m_state.ps_sr_views[i] = nullptr;
changed = true;
}
}
if (changed)
PSUpdateShaderState(true, false);
}
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref)
{
if (m_state.dss != dss || (dss && m_state.sref != sref))
{
m_state.dss = dss;
m_state.sref = sref;
m_ctx->OMSetDepthStencilState(dss, sref);
}
}
void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, u8 bf)
{
if (m_state.bs != bs || (bs && m_state.bf != bf))
{
m_state.bs = bs;
m_state.bf = bf;
const GSVector4 col(static_cast<float>(bf) / 128.0f);
m_ctx->OMSetBlendState(bs, col.v, 0xffffffff);
}
}
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor, ID3D11DepthStencilView* read_only_dsv)
{
ID3D11RenderTargetView* rtv = nullptr;
ID3D11DepthStencilView* dsv = nullptr;
if (rt)
{
CommitClear(rt);
rtv = *static_cast<GSTexture11*>(rt);
}
if (read_only_dsv)
{
dsv = read_only_dsv;
}
else if (ds)
{
CommitClear(ds);
dsv = *static_cast<GSTexture11*>(ds);
}
const bool changed = (m_state.rt_view != rtv || m_state.dsv != dsv);
g_perfmon.Put(GSPerfMon::RenderPasses, static_cast<double>(changed));
if (m_state.rt_view != rtv)
{
if (m_state.rt_view)
m_state.rt_view->Release();
if (rtv)
rtv->AddRef();
m_state.rt_view = rtv;
m_state.cached_rt_view = rt;
}
if (m_state.dsv != dsv)
{
if (m_state.dsv)
m_state.dsv->Release();
if (dsv)
dsv->AddRef();
m_state.dsv = dsv;
m_state.cached_dsv = ds;
}
if (changed)
m_ctx->OMSetRenderTargets(1, &rtv, dsv);
if (rt || ds)
{
const GSVector2i size = rt ? rt->GetSize() : ds->GetSize();
SetViewport(size);
SetScissor(scissor ? *scissor : GSVector4i::loadh(size));
}
}
void GSDevice11::SetViewport(const GSVector2i& viewport)
{
if (m_state.viewport != viewport)
{
m_state.viewport = viewport;
const D3D11_VIEWPORT vp = {
0.0f, 0.0f, static_cast<float>(viewport.x), static_cast<float>(viewport.y), 0.0f, 1.0f};
m_ctx->RSSetViewports(1, &vp);
}
}
void GSDevice11::SetScissor(const GSVector4i& scissor)
{
static_assert(sizeof(D3D11_RECT) == sizeof(GSVector4i));
if (!m_state.scissor.eq(scissor))
{
m_state.scissor = scissor;
m_ctx->RSSetScissorRects(1, reinterpret_cast<const D3D11_RECT*>(&scissor));
}
}
void GSDevice11::ShaderMacro::AddMacro(const char* n, int d)
{
AddMacro(n, std::to_string(d));
}
void GSDevice11::ShaderMacro::AddMacro(const char* n, std::string d)
{
mlist.emplace_back(n, std::move(d));
}
D3D_SHADER_MACRO* GSDevice11::ShaderMacro::GetPtr()
{
mout.clear();
for (auto& i : mlist)
mout.emplace_back(i.name.c_str(), i.def.c_str());
mout.emplace_back(nullptr, nullptr);
return (D3D_SHADER_MACRO*)mout.data();
}
void GSDevice11::RenderHW(GSHWDrawConfig& config)
{
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
if (colclip_rt)
{
if (config.colclip_mode == GSHWDrawConfig::ColClipMode::EarlyResolve)
{
const GSVector2i size = config.rt->GetSize();
const GSVector4 dRect(config.colclip_update_area);
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
StretchRect(colclip_rt, sRect, config.rt, dRect, ShaderConvert::COLCLIP_RESOLVE, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
Recycle(colclip_rt);
g_gs_device->SetColorClipTexture(nullptr);
colclip_rt = nullptr;
}
else
config.ps.colclip_hw = 1;
}
if (config.ps.colclip_hw)
{
if (!colclip_rt)
{
config.colclip_update_area = config.drawarea;
colclip_rt = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::ColorClip, false);
if (!colclip_rt)
{
Console.Warning("D3D11: Failed to allocate ColorClip render target, aborting draw.");
return;
}
g_gs_device->SetColorClipTexture(colclip_rt);
const GSVector4 dRect = GSVector4((config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertOnly) ? GSVector4i::loadh(rtsize) : config.drawarea);
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
StretchRect(config.rt, sRect, colclip_rt, dRect, ShaderConvert::COLCLIP_INIT, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
}
// Destination Alpha Setup
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
GSTexture* primid_texture = nullptr;
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
if (!primid_texture)
{
Console.Warning("D3D11: Failed to allocate DATE image, aborting draw.");
return;
}
DoStretchRect(colclip_rt ? colclip_rt : config.rt, GSVector4(config.drawarea) / GSVector4(rtsize).xyxy(),
primid_texture, GSVector4(config.drawarea), m_date.primid_init_ps[static_cast<u8>(config.datm)].get(), nullptr, false);
}
else if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Stencil ||
(config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && !multidraw_fb_copy))
SetupDATE(colclip_rt ? colclip_rt : config.rt, config.ds, config.datm, config.drawarea);
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
{
if (!IASetExpandVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
{
Console.Error("D3D11: Failed to upload structured vertices (%u)", config.nverts);
return;
}
config.cb_vs.max_depth.y = m_vertex.start;
}
else
{
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
{
Console.Error("D3D11: Failed to upload vertices (%u)", config.nverts);
return;
}
}
if (config.vs.UseExpandIndexBuffer())
{
IASetIndexBuffer(m_expand_ib.get());
m_index.start = 0;
m_index.count = config.nindices;
}
else
{
if (!IASetIndexBuffer(config.indices, config.nindices))
{
Console.Error("D3D11: Failed to upload indices (%u)", config.nindices);
return;
}
}
D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
switch (config.topology)
{
case GSHWDrawConfig::Topology::Point: topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; break;
case GSHWDrawConfig::Topology::Line: topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; break;
case GSHWDrawConfig::Topology::Triangle: topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
}
IASetPrimitiveTopology(topology);
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
ID3D11DepthStencilView* read_only_dsv = nullptr;
if (config.tex && config.tex == config.ds)
read_only_dsv = static_cast<GSTexture11*>(config.ds)->ReadOnlyDepthStencilView();
// Should be called before changing local srv state.
PSUnbindConflictingSRVs(colclip_rt ? colclip_rt : config.rt, read_only_dsv ? nullptr : config.ds);
if (config.tex)
{
CommitClear(config.tex);
PSSetShaderResource(0, config.tex);
}
if (config.pal)
{
CommitClear(config.pal);
PSSetShaderResource(1, config.pal);
}
SetupVS(config.vs, &config.cb_vs);
SetupPS(config.ps, &config.cb_ps, config.sampler);
if (primid_texture)
{
OMDepthStencilSelector dss = config.depth;
dss.zwe = 0;
const OMBlendSelector blend(GSHWDrawConfig::ColorMaskSelector(1),
GSHWDrawConfig::BlendState(true, CONST_ONE, CONST_ONE, 3 /* MIN */, CONST_ONE, CONST_ZERO, false, 0));
SetupOM(dss, blend, 0);
OMSetRenderTargets(primid_texture, config.ds, &config.scissor, read_only_dsv);
DrawIndexedPrimitive();
config.ps.date = 3;
config.alpha_second_pass.ps.date = 3;
SetupPS(config.ps, nullptr, config.sampler);
PSSetShaderResource(3, primid_texture);
}
else if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Depth)
{
PSSetShaderResource(2, config.rt);
config.rt = (config.rt == m_state.cached_rt_view) ? nullptr : m_state.cached_rt_view;
OMSetRenderTargets(config.rt, config.ds, &config.scissor, read_only_dsv);
}
// Avoid changing framebuffer just to switch from rt+depth to rt and vice versa.
GSTexture* draw_rt = colclip_rt ? colclip_rt : config.rt;
GSTexture* draw_ds = config.ds;
// Make sure no tex is bound as both rtv and srv at the same time.
// All conflicts should've been taken care of by PSUnbindConflictingSRVs.
// It is fine to do the optimiation when on slot 0 tex is fb, tex is ds, and slot 2 sw blend as they are copies bound to srv.
if (!draw_rt && draw_ds && m_state.rt_view && m_state.cached_rt_view && m_state.rt_view == *static_cast<GSTexture11*>(m_state.cached_rt_view) &&
m_state.cached_dsv == draw_ds && config.tex != m_state.cached_rt_view && m_state.cached_rt_view->GetSize() == draw_ds->GetSize())
{
draw_rt = m_state.cached_rt_view;
}
else if (!draw_ds && draw_rt && m_state.dsv && m_state.cached_dsv && m_state.dsv == *static_cast<GSTexture11*>(m_state.cached_dsv) &&
m_state.cached_rt_view == draw_rt && config.tex != m_state.cached_dsv && m_state.cached_dsv->GetSize() == draw_rt->GetSize())
{
draw_ds = m_state.cached_dsv;
}
GSTexture* draw_rt_clone = nullptr;
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
{
// Requires a copy of the RT.
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true);
if (!draw_rt_clone)
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
}
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
if (config.blend_multi_pass.enable)
{
config.ps.no_color1 = config.blend_multi_pass.no_color1;
config.ps.blend_hw = config.blend_multi_pass.blend_hw;
config.ps.dither = config.blend_multi_pass.dither;
SetupPS(config.ps, &config.cb_ps, config.sampler);
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend_multi_pass.blend), config.blend_multi_pass.blend.constant);
DrawIndexedPrimitive();
}
if (config.alpha_second_pass.enable)
{
if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref)
{
config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref;
SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler);
}
else
{
// ps cbuffer hasn't changed, so don't bother checking
SetupPS(config.alpha_second_pass.ps, nullptr, config.sampler);
}
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
SendHWDraw(config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
}
if (draw_rt_clone)
Recycle(draw_rt_clone);
if (primid_texture)
Recycle(primid_texture);
if (colclip_rt)
{
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
if (config.colclip_mode == GSHWDrawConfig::ColClipMode::ResolveOnly || config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertAndResolve)
{
const GSVector2i size = config.rt->GetSize();
const GSVector4 dRect(config.colclip_update_area);
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
StretchRect(colclip_rt, sRect, config.rt, dRect, ShaderConvert::COLCLIP_RESOLVE, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
Recycle(colclip_rt);
g_gs_device->SetColorClipTexture(nullptr);
}
}
}
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
{
if (draw_rt_clone)
{
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
Console.Warning("D3D11: Possible unnecessary copy detected.");
#endif
auto CopyAndBind = [&](GSVector4i drawarea) {
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
if (one_barrier || full_barrier)
PSSetShaderResource(2, draw_rt_clone);
if (config.tex && config.tex == config.rt)
PSSetShaderResource(0, draw_rt_clone);
};
if (m_features.multidraw_fb_copy && full_barrier)
{
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const u32 indices_per_prim = config.indices_per_prim;
pxAssert(config.drawlist && !config.drawlist->empty());
pxAssert(config.drawlist_bbox && static_cast<u32>(config.drawlist_bbox->size()) == draw_list_size);
for (u32 n = 0, p = 0; n < draw_list_size; n++)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
GSVector4i bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea);
// Copy only the part needed by the draw.
CopyAndBind(bbox);
DrawIndexedPrimitive(p, count);
p += count;
}
return;
}
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
if (!skip_first_barrier)
CopyAndBind(config.drawarea);
}
DrawIndexedPrimitive();
}