mirror of
https://github.com/PCSX2/pcsx2.git
synced 2026-01-31 01:15:24 +01:00
2881 lines
87 KiB
C++
2881 lines
87 KiB
C++
// SPDX-FileCopyrightText: 2002-2026 PCSX2 Dev Team
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
#include "GS.h"
|
|
#include "GS/GSGL.h"
|
|
#include "GSDevice11.h"
|
|
#include "GS/Renderers/DX11/D3D.h"
|
|
#include "GS/GSExtra.h"
|
|
#include "GS/GSPerfMon.h"
|
|
#include "GS/GSUtil.h"
|
|
#include "Host.h"
|
|
|
|
#include "common/BitUtils.h"
|
|
#include "common/Error.h"
|
|
#include "common/Path.h"
|
|
#include "common/StringUtil.h"
|
|
|
|
#include "imgui.h"
|
|
#include "IconsFontAwesome.h"
|
|
|
|
#include <bit>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <VersionHelpers.h>
|
|
#include <d3dcompiler.h>
|
|
#include <dxgidebug.h>
|
|
|
|
// #define REPORT_LEAKED_OBJECTS 1
|
|
|
|
static constexpr std::array<float, 4> s_present_clear_color = {};
|
|
|
|
static bool SupportsTextureFormat(ID3D11Device* dev, DXGI_FORMAT format)
|
|
{
|
|
UINT support;
|
|
if (FAILED(dev->CheckFormatSupport(format, &support)))
|
|
return false;
|
|
|
|
return (support & D3D11_FORMAT_SUPPORT_TEXTURE2D) != 0;
|
|
}
|
|
|
|
GSDevice11::GSDevice11()
|
|
{
|
|
memset(&m_state, 0, sizeof(m_state));
|
|
|
|
m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
|
m_state.bf = -1;
|
|
|
|
m_features.primitive_id = true;
|
|
m_features.texture_barrier = false;
|
|
m_features.multidraw_fb_copy = GSConfig.OverrideTextureBarriers != 0;
|
|
m_features.provoking_vertex_last = false;
|
|
m_features.point_expand = false;
|
|
m_features.line_expand = false;
|
|
m_features.prefer_new_textures = false;
|
|
m_features.dxt_textures = false;
|
|
m_features.bptc_textures = false;
|
|
m_features.framebuffer_fetch = false;
|
|
m_features.stencil_buffer = true;
|
|
m_features.cas_sharpening = true;
|
|
m_features.test_and_sample_depth = true;
|
|
}
|
|
|
|
GSDevice11::~GSDevice11() = default;
|
|
|
|
void GSDevice11::SetD3DDebugObjectName(ID3D11DeviceChild* obj, std::string_view name)
|
|
{
|
|
#ifdef PCSX2_DEVBUILD
|
|
// WKPDID_D3DDebugObjectName
|
|
static constexpr GUID guid = {0x429b8c22, 0x9188, 0x4b0c, {0x87, 0x42, 0xac, 0xb0, 0xbf, 0x85, 0xc2, 0x00}};
|
|
|
|
UINT existing_data_size;
|
|
HRESULT hr = obj->GetPrivateData(guid, &existing_data_size, nullptr);
|
|
if (SUCCEEDED(hr) && existing_data_size > 0)
|
|
return;
|
|
|
|
obj->SetPrivateData(guid, static_cast<UINT>(name.length()), name.data());
|
|
#endif
|
|
}
|
|
|
|
RenderAPI GSDevice11::GetRenderAPI() const
|
|
{
|
|
return RenderAPI::D3D11;
|
|
}
|
|
|
|
bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
|
{
|
|
if (!GSDevice::Create(vsync_mode, allow_present_throttle))
|
|
return false;
|
|
|
|
UINT create_flags = 0;
|
|
if (GSConfig.UseDebugDevice)
|
|
create_flags |= D3D11_CREATE_DEVICE_DEBUG;
|
|
|
|
m_dxgi_factory = D3D::CreateFactory(GSConfig.UseDebugDevice);
|
|
if (!m_dxgi_factory)
|
|
return false;
|
|
|
|
wil::com_ptr_nothrow<IDXGIAdapter1> dxgi_adapter = D3D::GetAdapterByName(m_dxgi_factory.get(), GSConfig.Adapter);
|
|
|
|
static constexpr std::array<D3D_FEATURE_LEVEL, 2> requested_feature_levels = {{
|
|
D3D_FEATURE_LEVEL_11_0,
|
|
D3D_FEATURE_LEVEL_10_0,
|
|
}};
|
|
|
|
wil::com_ptr_nothrow<ID3D11Device> temp_dev;
|
|
wil::com_ptr_nothrow<ID3D11DeviceContext> temp_ctx;
|
|
|
|
HRESULT hr =
|
|
D3D11CreateDevice(dxgi_adapter.get(), dxgi_adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE,
|
|
nullptr, create_flags, requested_feature_levels.data(), static_cast<UINT>(requested_feature_levels.size()),
|
|
D3D11_SDK_VERSION, temp_dev.put(), &m_feature_level, temp_ctx.put());
|
|
|
|
if (FAILED(hr) || !temp_dev.try_query_to(&m_dev) || !temp_ctx.try_query_to(&m_ctx))
|
|
{
|
|
Host::ReportErrorAsync("GS",
|
|
fmt::format(
|
|
TRANSLATE_FS("GS", "Failed to create D3D11 device: 0x{:08X}. A GPU which supports Direct3D Feature Level 10.0 is required."),
|
|
hr));
|
|
return false;
|
|
}
|
|
|
|
// we re-grab these later, see below
|
|
dxgi_adapter.reset();
|
|
temp_dev.reset();
|
|
temp_ctx.reset();
|
|
|
|
if (GSConfig.UseDebugDevice && IsDebuggerPresent())
|
|
{
|
|
wil::com_ptr_nothrow<ID3D11InfoQueue> info;
|
|
if (m_dev.try_query_to(&info))
|
|
{
|
|
info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_ERROR, TRUE);
|
|
info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_WARNING, TRUE);
|
|
|
|
// Silence some annoying harmless warnings.
|
|
D3D11_MESSAGE_ID hide[] = {
|
|
D3D11_MESSAGE_ID_DEVICE_OMSETRENDERTARGETS_HAZARD,
|
|
D3D11_MESSAGE_ID_DEVICE_PSSETSHADERRESOURCES_HAZARD,
|
|
D3D11_MESSAGE_ID_DEVICE_DRAW_RENDERTARGETVIEW_NOT_SET,
|
|
D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS,
|
|
};
|
|
|
|
D3D11_INFO_QUEUE_FILTER filter = {};
|
|
filter.DenyList.NumIDs = std::size(hide);
|
|
filter.DenyList.pIDList = hide;
|
|
info->AddStorageFilterEntries(&filter);
|
|
}
|
|
}
|
|
|
|
wil::com_ptr_nothrow<IDXGIDevice> dxgi_device;
|
|
if (m_dev.try_query_to(&dxgi_device) && SUCCEEDED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.put()))))
|
|
{
|
|
m_name = D3D::GetAdapterName(dxgi_adapter.get());
|
|
Console.WriteLn(fmt::format("D3D11: Adapter: {}", m_name));
|
|
}
|
|
else
|
|
Console.Error("D3D11: Failed to obtain adapter name.");
|
|
|
|
BOOL allow_tearing_supported = false;
|
|
hr = m_dxgi_factory->CheckFeatureSupport(
|
|
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
|
|
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
|
|
|
|
if (!AcquireWindow(true) || (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()))
|
|
return false;
|
|
|
|
D3D11_BUFFER_DESC bd;
|
|
D3D11_SAMPLER_DESC sd;
|
|
D3D11_DEPTH_STENCIL_DESC dsd;
|
|
D3D11_RASTERIZER_DESC rd;
|
|
D3D11_BLEND_DESC bsd;
|
|
|
|
if (GSConfig.UseDebugDevice)
|
|
m_annotation = m_ctx.try_query<ID3DUserDefinedAnnotation>();
|
|
|
|
if (!m_shader_cache.Open(m_feature_level, GSConfig.UseDebugDevice))
|
|
Console.Warning("D3D11: Shader cache failed to open.");
|
|
|
|
{
|
|
// HACK: check AMD
|
|
// Broken point sampler should be enabled only on AMD.
|
|
wil::com_ptr_nothrow<IDXGIDevice> dxgi_device;
|
|
wil::com_ptr_nothrow<IDXGIAdapter1> dxgi_adapter;
|
|
if (SUCCEEDED(m_dev->QueryInterface(dxgi_device.put())) &&
|
|
SUCCEEDED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.put()))))
|
|
{
|
|
m_features.broken_point_sampler = (D3D::GetVendorID(dxgi_adapter.get()) == D3D::VendorID::AMD);
|
|
}
|
|
}
|
|
|
|
SetFeatures(dxgi_adapter.get());
|
|
|
|
std::optional<std::string> shader = ReadShaderSource("shaders/dx11/tfx.fx");
|
|
if (!shader.has_value())
|
|
return false;
|
|
m_tfx_source = std::move(*shader);
|
|
|
|
// convert
|
|
|
|
D3D11_INPUT_ELEMENT_DESC il_convert[] =
|
|
{
|
|
{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
};
|
|
|
|
const std::optional<std::string> convert_hlsl = ReadShaderSource("shaders/dx11/convert.fx");
|
|
if (!convert_hlsl.has_value())
|
|
return false;
|
|
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_convert.vs.put(), m_convert.il.put(),
|
|
il_convert, std::size(il_convert), *convert_hlsl, nullptr, "vs_main"))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < std::size(m_convert.ps); i++)
|
|
{
|
|
m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, nullptr, shaderName(static_cast<ShaderConvert>(i)));
|
|
if (!m_convert.ps[i])
|
|
return false;
|
|
}
|
|
|
|
shader = ReadShaderSource("shaders/dx11/present.fx");
|
|
if (!shader.has_value())
|
|
return false;
|
|
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_present.vs.put(), m_present.il.put(),
|
|
il_convert, std::size(il_convert), *shader, nullptr, "vs_main"))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < std::size(m_present.ps); i++)
|
|
{
|
|
m_present.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, shaderName(static_cast<PresentShader>(i)));
|
|
if (!m_present.ps[i])
|
|
return false;
|
|
}
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
bd.ByteWidth = sizeof(DisplayConstantBuffer);
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
m_dev->CreateBuffer(&bd, nullptr, m_present.ps_cb.put());
|
|
|
|
memset(&dsd, 0, sizeof(dsd));
|
|
|
|
m_dev->CreateDepthStencilState(&dsd, m_convert.dss.put());
|
|
|
|
dsd.DepthEnable = true;
|
|
dsd.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
|
|
dsd.DepthFunc = D3D11_COMPARISON_ALWAYS;
|
|
|
|
m_dev->CreateDepthStencilState(&dsd, m_convert.dss_write.put());
|
|
|
|
memset(&bsd, 0, sizeof(bsd));
|
|
|
|
for (u32 i = 0; i < static_cast<u32>(m_convert.bs.size()); i++)
|
|
{
|
|
bsd.RenderTarget[0].RenderTargetWriteMask = static_cast<u8>(i);
|
|
m_dev->CreateBlendState(&bsd, m_convert.bs[i].put());
|
|
}
|
|
|
|
// merge
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
bd.ByteWidth = sizeof(MergeConstantBuffer);
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
m_dev->CreateBuffer(&bd, nullptr, m_merge.cb.put());
|
|
|
|
shader = ReadShaderSource("shaders/dx11/merge.fx");
|
|
if (!shader.has_value())
|
|
return false;
|
|
|
|
for (size_t i = 0; i < std::size(m_merge.ps); i++)
|
|
{
|
|
const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%zu", i));
|
|
m_merge.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str());
|
|
if (!m_merge.ps[i])
|
|
return false;
|
|
}
|
|
|
|
memset(&bsd, 0, sizeof(bsd));
|
|
|
|
bsd.RenderTarget[0].BlendEnable = true;
|
|
bsd.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
|
|
bsd.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
|
|
bsd.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
|
|
bsd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
|
|
bsd.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
|
|
bsd.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
|
|
bsd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
|
|
|
|
m_dev->CreateBlendState(&bsd, m_merge.bs.put());
|
|
|
|
// interlace
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
bd.ByteWidth = sizeof(InterlaceConstantBuffer);
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
m_dev->CreateBuffer(&bd, nullptr, m_interlace.cb.put());
|
|
|
|
shader = ReadShaderSource("shaders/dx11/interlace.fx");
|
|
if (!shader.has_value())
|
|
return false;
|
|
for (size_t i = 0; i < std::size(m_interlace.ps); i++)
|
|
{
|
|
const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%zu", i));
|
|
m_interlace.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str());
|
|
if (!m_interlace.ps[i])
|
|
return false;
|
|
}
|
|
|
|
// Shade Boost
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
bd.ByteWidth = sizeof(float) * 4;
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
m_dev->CreateBuffer(&bd, nullptr, m_shadeboost.cb.put());
|
|
|
|
shader = ReadShaderSource("shaders/dx11/shadeboost.fx");
|
|
if (!shader.has_value())
|
|
return false;
|
|
m_shadeboost.ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, "ps_main");
|
|
if (!m_shadeboost.ps)
|
|
return false;
|
|
|
|
// Vertex/Index Buffer
|
|
|
|
bd = {};
|
|
bd.ByteWidth = VERTEX_BUFFER_SIZE;
|
|
bd.Usage = D3D11_USAGE_DYNAMIC;
|
|
bd.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
|
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_vb.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create vertex buffer.");
|
|
return false;
|
|
}
|
|
|
|
bd.ByteWidth = INDEX_BUFFER_SIZE;
|
|
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_ib.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create index buffer.");
|
|
return false;
|
|
}
|
|
IASetIndexBuffer(m_ib.get());
|
|
|
|
if (m_features.vs_expand)
|
|
{
|
|
bd.ByteWidth = VERTEX_BUFFER_SIZE;
|
|
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
|
bd.StructureByteStride = sizeof(GSVertex);
|
|
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
|
|
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_expand_vb.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create expand vertex buffer.");
|
|
return false;
|
|
}
|
|
|
|
const CD3D11_SHADER_RESOURCE_VIEW_DESC vb_srv_desc(
|
|
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, VERTEX_BUFFER_SIZE / sizeof(GSVertex));
|
|
if (FAILED(m_dev->CreateShaderResourceView(m_expand_vb.get(), &vb_srv_desc, m_expand_vb_srv.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create expand vertex buffer SRV.");
|
|
return false;
|
|
}
|
|
|
|
m_ctx->VSSetShaderResources(0, 1, m_expand_vb_srv.addressof());
|
|
|
|
bd.ByteWidth = EXPAND_BUFFER_SIZE;
|
|
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
|
bd.StructureByteStride = 0;
|
|
bd.MiscFlags = 0;
|
|
|
|
std::unique_ptr<u8[]> expand_data = std::make_unique<u8[]>(EXPAND_BUFFER_SIZE);
|
|
GenerateExpansionIndexBuffer(expand_data.get());
|
|
|
|
const D3D11_SUBRESOURCE_DATA srd = {expand_data.get()};
|
|
if (FAILED(m_dev->CreateBuffer(&bd, &srd, m_expand_ib.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create expand index buffer.");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// rasterizer
|
|
|
|
memset(&rd, 0, sizeof(rd));
|
|
|
|
rd.FillMode = D3D11_FILL_SOLID;
|
|
rd.CullMode = D3D11_CULL_NONE;
|
|
rd.FrontCounterClockwise = false;
|
|
rd.DepthBias = false;
|
|
rd.DepthBiasClamp = 0;
|
|
rd.SlopeScaledDepthBias = 0;
|
|
rd.DepthClipEnable = false; // ???
|
|
rd.ScissorEnable = true;
|
|
rd.MultisampleEnable = false;
|
|
rd.AntialiasedLineEnable = false;
|
|
|
|
m_dev->CreateRasterizerState(&rd, m_rs.put());
|
|
m_ctx->RSSetState(m_rs.get());
|
|
|
|
// sampler
|
|
|
|
memset(&sd, 0, sizeof(sd));
|
|
|
|
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
|
|
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.MinLOD = -FLT_MAX;
|
|
sd.MaxLOD = FLT_MAX;
|
|
sd.MaxAnisotropy = 1;
|
|
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
|
|
|
|
m_dev->CreateSamplerState(&sd, m_convert.ln.put());
|
|
|
|
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
|
|
|
|
m_dev->CreateSamplerState(&sd, m_convert.pt.put());
|
|
|
|
// constant buffer
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
bd.ByteWidth = sizeof(GSHWDrawConfig::VSConstantBuffer);
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_vs_cb.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create vertex shader constant buffer.");
|
|
return false;
|
|
}
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
bd.ByteWidth = sizeof(GSHWDrawConfig::PSConstantBuffer);
|
|
bd.Usage = D3D11_USAGE_DEFAULT;
|
|
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
|
|
if (FAILED(m_dev->CreateBuffer(&bd, nullptr, m_ps_cb.put())))
|
|
{
|
|
Console.Error("D3D11: Failed to create pixel shader constant buffer.");
|
|
return false;
|
|
}
|
|
|
|
// create layout
|
|
|
|
{
|
|
const VSSelector sel;
|
|
const GSHWDrawConfig::VSConstantBuffer cb;
|
|
|
|
SetupVS(sel, &cb);
|
|
}
|
|
|
|
// depth stencil
|
|
|
|
memset(&dsd, 0, sizeof(dsd));
|
|
|
|
dsd.DepthEnable = false;
|
|
dsd.StencilEnable = true;
|
|
dsd.StencilReadMask = 1;
|
|
dsd.StencilWriteMask = 1;
|
|
dsd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
|
|
dsd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE;
|
|
dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
|
|
dsd.BackFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE;
|
|
dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
|
|
|
|
m_dev->CreateDepthStencilState(&dsd, m_date.dss.put());
|
|
|
|
// blend
|
|
|
|
{
|
|
D3D11_BLEND_DESC blend;
|
|
|
|
memset(&blend, 0, sizeof(blend));
|
|
|
|
m_dev->CreateBlendState(&blend, m_date.bs.put());
|
|
}
|
|
|
|
for (size_t i = 0; i < std::size(m_date.primid_init_ps); i++)
|
|
{
|
|
const std::string entry_point(StringUtil::StdStringFromFormat("ps_stencil_image_init_%zu", i));
|
|
m_date.primid_init_ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, nullptr, entry_point.c_str());
|
|
if (!m_date.primid_init_ps[i])
|
|
return false;
|
|
}
|
|
|
|
if (m_features.cas_sharpening && !CreateCASShaders())
|
|
return false;
|
|
|
|
if (!CreateImGuiResources())
|
|
return false;
|
|
|
|
if (m_feature_level < D3D_FEATURE_LEVEL_11_0)
|
|
{
|
|
Host::AddIconOSDMessage("d3d11_feature_level_warning", ICON_FA_TRIANGLE_EXCLAMATION,
|
|
TRANSLATE_SV("GS", "The Direct3D 11 renderer is running at feature level 10.0. This is an UNSUPPORTED configuration.\n"
|
|
"Do not request support, please upgrade your hardware/drivers first."),
|
|
Host::OSD_WARNING_DURATION);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::Destroy()
|
|
{
|
|
GSDevice::Destroy();
|
|
DestroySwapChain();
|
|
DestroyTimestampQueries();
|
|
|
|
m_convert = {};
|
|
m_present = {};
|
|
m_merge = {};
|
|
m_interlace = {};
|
|
m_shadeboost = {};
|
|
m_date = {};
|
|
m_cas = {};
|
|
m_imgui = {};
|
|
|
|
m_vb.reset();
|
|
m_ib.reset();
|
|
m_expand_vb_srv.reset();
|
|
m_expand_vb.reset();
|
|
m_expand_ib.reset();
|
|
|
|
m_vs.clear();
|
|
m_vs_cb.reset();
|
|
m_gs.clear();
|
|
m_ps.clear();
|
|
m_ps_cb.reset();
|
|
m_ps_ss.clear();
|
|
m_om_dss.clear();
|
|
m_om_bs.clear();
|
|
m_rs.reset();
|
|
|
|
if (m_state.rt_view)
|
|
{
|
|
m_state.rt_view->Release();
|
|
m_state.rt_view = nullptr;
|
|
}
|
|
m_state.cached_rt_view = nullptr;
|
|
|
|
if (m_state.dsv)
|
|
{
|
|
m_state.dsv->Release();
|
|
m_state.dsv = nullptr;
|
|
}
|
|
m_state.cached_dsv = nullptr;
|
|
|
|
m_shader_cache.Close();
|
|
|
|
#ifdef REPORT_LEAKED_OBJECTS
|
|
wil::com_ptr_nothrow<ID3D11Debug> debug;
|
|
m_dev.try_query_to(&debug);
|
|
#endif
|
|
|
|
m_annotation.reset();
|
|
m_ctx.reset();
|
|
m_dev.reset();
|
|
m_dxgi_factory.reset();
|
|
|
|
#ifdef REPORT_LEAKED_OBJECTS
|
|
if (debug)
|
|
debug->ReportLiveDeviceObjects(D3D11_RLDO_DETAIL | D3D11_RLDO_IGNORE_INTERNAL);
|
|
#endif
|
|
}
|
|
|
|
void GSDevice11::SetFeatures(IDXGIAdapter1* adapter)
|
|
{
|
|
// Check all three formats, since the feature means any can be used.
|
|
m_features.dxt_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC1_UNORM) &&
|
|
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC2_UNORM) &&
|
|
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM);
|
|
|
|
m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM);
|
|
|
|
m_features.vs_expand = (!GSConfig.DisableVertexShaderExpand && m_feature_level >= D3D_FEATURE_LEVEL_11_0);
|
|
m_features.cas_sharpening = (m_feature_level >= D3D_FEATURE_LEVEL_11_0);
|
|
|
|
m_max_texture_size = (m_feature_level >= D3D_FEATURE_LEVEL_11_0) ?
|
|
D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION :
|
|
D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
|
}
|
|
|
|
bool GSDevice11::HasSurface() const
|
|
{
|
|
return static_cast<bool>(m_swap_chain);
|
|
}
|
|
|
|
void GSDevice11::SetVSyncMode(GSVSyncMode mode, bool allow_present_throttle)
|
|
{
|
|
m_allow_present_throttle = allow_present_throttle;
|
|
|
|
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
|
|
if (mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
|
|
{
|
|
WARNING_LOG("D3D11: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
|
|
mode = GSVSyncMode::FIFO;
|
|
}
|
|
|
|
if (m_vsync_mode == mode)
|
|
return;
|
|
|
|
const u32 old_buffer_count = GetSwapChainBufferCount();
|
|
m_vsync_mode = mode;
|
|
if (!m_swap_chain)
|
|
return;
|
|
|
|
if (GetSwapChainBufferCount() != old_buffer_count)
|
|
{
|
|
DestroySwapChain();
|
|
if (!CreateSwapChain())
|
|
pxFailRel("D3D11: Failed to recreate swap chain after vsync change.");
|
|
}
|
|
}
|
|
|
|
u32 GSDevice11::GetSwapChainBufferCount() const
|
|
{
|
|
// With vsync off, we only need two buffers. Same for blocking vsync.
|
|
// With triple buffering, we need three.
|
|
return (m_vsync_mode == GSVSyncMode::Mailbox) ? 3 : 2;
|
|
}
|
|
|
|
bool GSDevice11::CreateSwapChain()
|
|
{
|
|
constexpr DXGI_FORMAT swap_chain_format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
if (m_window_info.type != WindowInfo::Type::Win32)
|
|
return false;
|
|
|
|
const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
|
|
RECT client_rc{};
|
|
GetClientRect(window_hwnd, &client_rc);
|
|
|
|
DXGI_MODE_DESC fullscreen_mode;
|
|
wil::com_ptr_nothrow<IDXGIOutput> fullscreen_output;
|
|
if (Host::IsFullscreen())
|
|
{
|
|
u32 fullscreen_width, fullscreen_height;
|
|
float fullscreen_refresh_rate;
|
|
m_is_exclusive_fullscreen =
|
|
GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) &&
|
|
D3D::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.get(), window_hwnd, fullscreen_width,
|
|
fullscreen_height, fullscreen_refresh_rate, swap_chain_format, &fullscreen_mode,
|
|
fullscreen_output.put());
|
|
|
|
// Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
|
|
if (m_vsync_mode == GSVSyncMode::Mailbox && m_is_exclusive_fullscreen)
|
|
{
|
|
WARNING_LOG("D3D11: Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
|
|
m_vsync_mode = GSVSyncMode::FIFO;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_is_exclusive_fullscreen = false;
|
|
}
|
|
|
|
m_using_flip_model_swap_chain = !GSConfig.UseBlitSwapChain || m_is_exclusive_fullscreen;
|
|
|
|
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
|
|
swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
|
|
swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
|
|
swap_chain_desc.Format = swap_chain_format;
|
|
swap_chain_desc.SampleDesc.Count = 1;
|
|
swap_chain_desc.BufferCount = GetSwapChainBufferCount();
|
|
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
swap_chain_desc.SwapEffect =
|
|
m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD;
|
|
|
|
m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen);
|
|
if (m_using_allow_tearing)
|
|
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
if (m_is_exclusive_fullscreen)
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
|
|
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
|
|
|
|
fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
|
|
fs_sd_desc.Width = fullscreen_mode.Width;
|
|
fs_sd_desc.Height = fullscreen_mode.Height;
|
|
fs_desc.RefreshRate = fullscreen_mode.RefreshRate;
|
|
fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering;
|
|
fs_desc.Scaling = fullscreen_mode.Scaling;
|
|
fs_desc.Windowed = FALSE;
|
|
|
|
Console.WriteLn("D3D11: Creating a %dx%d exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
|
|
hr = m_dxgi_factory->CreateSwapChainForHwnd(
|
|
m_dev.get(), window_hwnd, &fs_sd_desc, &fs_desc, fullscreen_output.get(), m_swap_chain.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Warning("D3D11: Failed to create fullscreen swap chain, trying windowed.");
|
|
m_is_exclusive_fullscreen = false;
|
|
m_using_allow_tearing = m_allow_tearing_supported && m_using_flip_model_swap_chain;
|
|
}
|
|
}
|
|
|
|
if (!m_is_exclusive_fullscreen)
|
|
{
|
|
Console.WriteLn("D3D11: Creating a %dx%d %s windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height,
|
|
m_using_flip_model_swap_chain ? "flip-discard" : "discard");
|
|
hr = m_dxgi_factory->CreateSwapChainForHwnd(
|
|
m_dev.get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.put());
|
|
}
|
|
|
|
if (FAILED(hr) && m_using_flip_model_swap_chain)
|
|
{
|
|
Console.Warning("D3D11: Failed to create a flip-discard swap chain, trying discard.");
|
|
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
|
|
swap_chain_desc.Flags = 0;
|
|
m_using_flip_model_swap_chain = false;
|
|
m_using_allow_tearing = false;
|
|
|
|
hr = m_dxgi_factory->CreateSwapChainForHwnd(
|
|
m_dev.get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, m_swap_chain.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: CreateSwapChainForHwnd failed: 0x%08X", hr);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// MWA needs to be called on the correct factory.
|
|
wil::com_ptr_nothrow<IDXGIFactory> swap_chain_factory;
|
|
hr = m_swap_chain->GetParent(IID_PPV_ARGS(swap_chain_factory.put()));
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = swap_chain_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
|
|
if (FAILED(hr))
|
|
Console.ErrorFmt("D3D11: MakeWindowAssociation() to disable ALT+ENTER failed: {}", Error::CreateHResult(hr).GetDescription());
|
|
}
|
|
else
|
|
{
|
|
Console.ErrorFmt("D3D11: GetParent() on swap chain to get factory failed: {}", Error::CreateHResult(hr).GetDescription());
|
|
}
|
|
|
|
if (!CreateSwapChainRTV())
|
|
{
|
|
DestroySwapChain();
|
|
return false;
|
|
}
|
|
|
|
// Render a frame as soon as possible to clear out whatever was previously being displayed.
|
|
m_ctx->ClearRenderTargetView(m_swap_chain_rtv.get(), s_present_clear_color.data());
|
|
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice11::CreateSwapChainRTV()
|
|
{
|
|
wil::com_ptr_nothrow<ID3D11Texture2D> backbuffer;
|
|
HRESULT hr = m_swap_chain->GetBuffer(0, IID_PPV_ARGS(backbuffer.put()));
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: GetBuffer for RTV failed: 0x%08X", hr);
|
|
return false;
|
|
}
|
|
|
|
D3D11_TEXTURE2D_DESC backbuffer_desc;
|
|
backbuffer->GetDesc(&backbuffer_desc);
|
|
|
|
CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(
|
|
D3D11_RTV_DIMENSION_TEXTURE2D, backbuffer_desc.Format, 0, 0, backbuffer_desc.ArraySize);
|
|
hr = m_dev->CreateRenderTargetView(backbuffer.get(), &rtv_desc, m_swap_chain_rtv.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: CreateRenderTargetView for swap chain failed: 0x%08X", hr);
|
|
m_swap_chain_rtv.reset();
|
|
return false;
|
|
}
|
|
|
|
m_window_info.surface_width = backbuffer_desc.Width;
|
|
m_window_info.surface_height = backbuffer_desc.Height;
|
|
DevCon.WriteLn("D3D11: Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height);
|
|
|
|
if (m_window_info.type == WindowInfo::Type::Win32)
|
|
{
|
|
BOOL fullscreen = FALSE;
|
|
DXGI_SWAP_CHAIN_DESC desc;
|
|
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
|
|
SUCCEEDED(m_swap_chain->GetDesc(&desc)))
|
|
{
|
|
m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
|
|
static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::DestroySwapChain()
|
|
{
|
|
if (!m_swap_chain)
|
|
return;
|
|
|
|
m_swap_chain_rtv.reset();
|
|
|
|
// switch out of fullscreen before destroying
|
|
BOOL is_fullscreen;
|
|
if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
|
|
m_swap_chain->SetFullscreenState(FALSE, nullptr);
|
|
|
|
m_swap_chain.reset();
|
|
m_is_exclusive_fullscreen = false;
|
|
}
|
|
|
|
bool GSDevice11::UpdateWindow()
|
|
{
|
|
DestroySwapChain();
|
|
|
|
if (!AcquireWindow(false))
|
|
return false;
|
|
|
|
if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain())
|
|
{
|
|
Console.WriteLn("D3D11: Failed to create swap chain on updated window");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::DestroySurface()
|
|
{
|
|
DestroySwapChain();
|
|
}
|
|
|
|
std::string GSDevice11::GetDriverInfo() const
|
|
{
|
|
std::string ret = "Unknown Feature Level";
|
|
|
|
static constexpr std::array<std::tuple<D3D_FEATURE_LEVEL, const char*>, 2> feature_level_names = {{
|
|
{D3D_FEATURE_LEVEL_10_0, "D3D_FEATURE_LEVEL_10_0"},
|
|
{D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"},
|
|
}};
|
|
|
|
for (size_t i = 0; i < std::size(feature_level_names); i++)
|
|
{
|
|
if (m_feature_level == std::get<0>(feature_level_names[i]))
|
|
{
|
|
ret = std::get<1>(feature_level_names[i]);
|
|
break;
|
|
}
|
|
}
|
|
|
|
ret += "\n";
|
|
|
|
wil::com_ptr_nothrow<IDXGIDevice> dxgi_dev;
|
|
if (m_dev.try_query_to(&dxgi_dev))
|
|
{
|
|
wil::com_ptr_nothrow<IDXGIAdapter> dxgi_adapter;
|
|
if (SUCCEEDED(dxgi_dev->GetAdapter(dxgi_adapter.put())))
|
|
{
|
|
DXGI_ADAPTER_DESC desc;
|
|
if (SUCCEEDED(dxgi_adapter->GetDesc(&desc)))
|
|
{
|
|
ret += StringUtil::StdStringFromFormat("VID: 0x%04X PID: 0x%04X\n", desc.VendorId, desc.DeviceId);
|
|
ret += StringUtil::WideStringToUTF8String(desc.Description);
|
|
ret += "\n";
|
|
|
|
const std::string driver_version(D3D::GetDriverVersionFromLUID(desc.AdapterLuid));
|
|
if (!driver_version.empty())
|
|
{
|
|
ret += "Driver Version: ";
|
|
ret += driver_version;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void GSDevice11::ResizeWindow(u32 new_window_width, u32 new_window_height, float new_window_scale)
|
|
{
|
|
if (!m_swap_chain || m_is_exclusive_fullscreen)
|
|
return;
|
|
|
|
m_window_info.surface_scale = new_window_scale;
|
|
|
|
if (m_window_info.surface_width == new_window_width && m_window_info.surface_height == new_window_height)
|
|
return;
|
|
|
|
m_swap_chain_rtv.reset();
|
|
|
|
HRESULT hr = m_swap_chain->ResizeBuffers(
|
|
0, 0, 0, DXGI_FORMAT_UNKNOWN, m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
|
|
if (FAILED(hr))
|
|
Console.Error("D3D11: ResizeBuffers() failed: 0x%08X", hr);
|
|
|
|
if (!CreateSwapChainRTV())
|
|
pxFailRel("Failed to recreate swap chain RTV after resize");
|
|
}
|
|
|
|
bool GSDevice11::SupportsExclusiveFullscreen() const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
GSDevice::PresentResult GSDevice11::BeginPresent(bool frame_skip)
|
|
{
|
|
if (frame_skip || !m_swap_chain)
|
|
return PresentResult::FrameSkipped;
|
|
|
|
// Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
|
|
// This might get called repeatedly if it takes a while to switch back, that's the host's problem.
|
|
BOOL is_fullscreen;
|
|
if (m_is_exclusive_fullscreen &&
|
|
(FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
|
|
{
|
|
Host::RunOnCPUThread([]() { Host::SetFullscreen(false); });
|
|
return PresentResult::FrameSkipped;
|
|
}
|
|
|
|
// When using vsync, the time here seems to include the time for the buffer to become available.
|
|
// This blows our our GPU usage number considerably, so read the timestamp before the final blit
|
|
// in this configuration. It does reduce accuracy a little, but better than seeing 100% all of
|
|
// the time, when it's more like a couple of percent.
|
|
if (m_vsync_mode == GSVSyncMode::FIFO && m_gpu_timing_enabled)
|
|
PopTimestampQuery();
|
|
|
|
m_ctx->ClearRenderTargetView(m_swap_chain_rtv.get(), s_present_clear_color.data());
|
|
m_ctx->OMSetRenderTargets(1, m_swap_chain_rtv.addressof(), nullptr);
|
|
if (m_state.rt_view)
|
|
m_state.rt_view->Release();
|
|
m_state.rt_view = m_swap_chain_rtv.get();
|
|
m_state.rt_view->AddRef();
|
|
m_state.cached_rt_view = nullptr;
|
|
if (m_state.dsv)
|
|
{
|
|
m_state.dsv->Release();
|
|
m_state.dsv = nullptr;
|
|
}
|
|
m_state.cached_dsv = nullptr;
|
|
|
|
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
|
|
|
|
const GSVector2i size = GetWindowSize();
|
|
SetViewport(size);
|
|
SetScissor(GSVector4i::loadh(size));
|
|
|
|
return PresentResult::OK;
|
|
}
|
|
|
|
void GSDevice11::EndPresent()
|
|
{
|
|
RenderImGui();
|
|
|
|
// See note in BeginPresent() for why it's conditional on vsync-off.
|
|
if (m_vsync_mode != GSVSyncMode::FIFO && m_gpu_timing_enabled)
|
|
PopTimestampQuery();
|
|
|
|
const UINT sync_interval = static_cast<UINT>(m_vsync_mode == GSVSyncMode::FIFO);
|
|
const UINT flags = (m_vsync_mode == GSVSyncMode::Disabled && m_using_allow_tearing) ? DXGI_PRESENT_ALLOW_TEARING : 0;
|
|
m_swap_chain->Present(sync_interval, flags);
|
|
|
|
if (m_gpu_timing_enabled)
|
|
KickTimestampQuery();
|
|
|
|
// clear out the swap chain view, it might get resized..
|
|
OMSetRenderTargets(nullptr, nullptr, nullptr, nullptr);
|
|
}
|
|
|
|
bool GSDevice11::CreateTimestampQueries()
|
|
{
|
|
for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++)
|
|
{
|
|
for (u32 j = 0; j < 3; j++)
|
|
{
|
|
const CD3D11_QUERY_DESC qdesc((j == 0) ? D3D11_QUERY_TIMESTAMP_DISJOINT : D3D11_QUERY_TIMESTAMP);
|
|
const HRESULT hr = m_dev->CreateQuery(&qdesc, m_timestamp_queries[i][j].put());
|
|
if (FAILED(hr))
|
|
{
|
|
m_timestamp_queries = {};
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
KickTimestampQuery();
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::DestroyTimestampQueries()
|
|
{
|
|
if (!m_timestamp_queries[0][0])
|
|
return;
|
|
|
|
if (m_timestamp_query_started)
|
|
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][1].get());
|
|
|
|
m_timestamp_queries = {};
|
|
m_read_timestamp_query = 0;
|
|
m_write_timestamp_query = 0;
|
|
m_waiting_timestamp_queries = 0;
|
|
m_timestamp_query_started = 0;
|
|
}
|
|
|
|
void GSDevice11::PopTimestampQuery()
|
|
{
|
|
while (m_waiting_timestamp_queries > 0)
|
|
{
|
|
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
|
const HRESULT disjoint_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][0].get(), &disjoint,
|
|
sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH);
|
|
if (disjoint_hr != S_OK)
|
|
break;
|
|
|
|
if (disjoint.Disjoint)
|
|
{
|
|
DevCon.WriteLn("D3D11: GPU timing disjoint, resetting.");
|
|
m_read_timestamp_query = 0;
|
|
m_write_timestamp_query = 0;
|
|
m_waiting_timestamp_queries = 0;
|
|
m_timestamp_query_started = 0;
|
|
}
|
|
else
|
|
{
|
|
u64 start = 0, end = 0;
|
|
const HRESULT start_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][1].get(), &start,
|
|
sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH);
|
|
const HRESULT end_hr = m_ctx->GetData(m_timestamp_queries[m_read_timestamp_query][2].get(), &end,
|
|
sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH);
|
|
if (start_hr == S_OK && end_hr == S_OK)
|
|
{
|
|
m_accumulated_gpu_time += static_cast<float>(
|
|
static_cast<double>(end - start) / (static_cast<double>(disjoint.Frequency) / 1000.0));
|
|
m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
|
|
m_waiting_timestamp_queries--;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_timestamp_query_started)
|
|
{
|
|
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][2].get());
|
|
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][0].get());
|
|
m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
|
|
m_timestamp_query_started = false;
|
|
m_waiting_timestamp_queries++;
|
|
}
|
|
}
|
|
|
|
void GSDevice11::KickTimestampQuery()
|
|
{
|
|
if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES)
|
|
return;
|
|
|
|
m_ctx->Begin(m_timestamp_queries[m_write_timestamp_query][0].get());
|
|
m_ctx->End(m_timestamp_queries[m_write_timestamp_query][1].get());
|
|
m_timestamp_query_started = true;
|
|
}
|
|
|
|
bool GSDevice11::SetGPUTimingEnabled(bool enabled)
|
|
{
|
|
if (m_gpu_timing_enabled == enabled)
|
|
return true;
|
|
|
|
m_gpu_timing_enabled = enabled;
|
|
if (m_gpu_timing_enabled)
|
|
{
|
|
return CreateTimestampQueries();
|
|
}
|
|
else
|
|
{
|
|
DestroyTimestampQueries();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
float GSDevice11::GetAndResetAccumulatedGPUTime()
|
|
{
|
|
const float value = m_accumulated_gpu_time;
|
|
m_accumulated_gpu_time = 0.0f;
|
|
return value;
|
|
}
|
|
|
|
void GSDevice11::DrawPrimitive()
|
|
{
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
PSUpdateShaderState(true, true);
|
|
m_ctx->Draw(m_vertex.count, m_vertex.start);
|
|
}
|
|
|
|
void GSDevice11::DrawIndexedPrimitive()
|
|
{
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
PSUpdateShaderState(true, true);
|
|
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
|
|
}
|
|
|
|
void GSDevice11::DrawIndexedPrimitive(int offset, int count)
|
|
{
|
|
pxAssert(offset + count <= (int)m_index.count);
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
|
PSUpdateShaderState(true, true);
|
|
m_ctx->DrawIndexed(count, m_index.start + offset, m_vertex.start);
|
|
}
|
|
|
|
void GSDevice11::CommitClear(GSTexture* t)
|
|
{
|
|
GSTexture11* T = static_cast<GSTexture11*>(t);
|
|
if (!T->IsRenderTargetOrDepthStencil() || T->GetState() == GSTexture::State::Dirty)
|
|
return;
|
|
|
|
if (T->IsDepthStencil())
|
|
{
|
|
if (T->GetState() == GSTexture::State::Invalidated)
|
|
m_ctx->DiscardView(static_cast<ID3D11DepthStencilView*>(*T));
|
|
else
|
|
m_ctx->ClearDepthStencilView(*T, D3D11_CLEAR_DEPTH, T->GetClearDepth(), 0);
|
|
}
|
|
else
|
|
{
|
|
if (T->GetState() == GSTexture::State::Invalidated)
|
|
m_ctx->DiscardView(static_cast<ID3D11RenderTargetView*>(*T));
|
|
else
|
|
m_ctx->ClearRenderTargetView(*T, T->GetUNormClearColor().F32);
|
|
}
|
|
|
|
T->SetState(GSTexture::State::Dirty);
|
|
}
|
|
|
|
void GSDevice11::PushDebugGroup(const char* fmt, ...)
|
|
{
|
|
if (!m_annotation)
|
|
return;
|
|
|
|
std::va_list ap;
|
|
va_start(ap, fmt);
|
|
std::string str(StringUtil::StdStringFromFormatV(fmt, ap));
|
|
va_end(ap);
|
|
|
|
m_annotation->BeginEvent(StringUtil::UTF8StringToWideString(str).c_str());
|
|
}
|
|
|
|
void GSDevice11::PopDebugGroup()
|
|
{
|
|
if (!m_annotation)
|
|
return;
|
|
|
|
m_annotation->EndEvent();
|
|
}
|
|
|
|
void GSDevice11::InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...)
|
|
{
|
|
if (!m_annotation)
|
|
return;
|
|
|
|
std::va_list ap;
|
|
va_start(ap, fmt);
|
|
std::string str(StringUtil::StdStringFromFormatV(fmt, ap));
|
|
va_end(ap);
|
|
|
|
m_annotation->SetMarker(StringUtil::UTF8StringToWideString(str).c_str());
|
|
}
|
|
|
|
GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
|
|
{
|
|
D3D11_TEXTURE2D_DESC desc = {};
|
|
desc.Width = width;
|
|
desc.Height = height;
|
|
desc.Format = GSTexture11::GetDXGIFormat(format);
|
|
desc.MipLevels = levels;
|
|
desc.ArraySize = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.Usage = D3D11_USAGE_DEFAULT;
|
|
|
|
switch (type)
|
|
{
|
|
case GSTexture::Type::RenderTarget:
|
|
desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
|
|
break;
|
|
case GSTexture::Type::DepthStencil:
|
|
desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
|
|
break;
|
|
case GSTexture::Type::Texture:
|
|
desc.BindFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? (D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE) : D3D11_BIND_SHADER_RESOURCE;
|
|
desc.MiscFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0;
|
|
break;
|
|
case GSTexture::Type::RWTexture:
|
|
desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
wil::com_ptr_nothrow<ID3D11Texture2D> texture;
|
|
HRESULT hr = m_dev->CreateTexture2D(&desc, nullptr, texture.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: Failed to allocate %dx%d surface", width, height);
|
|
return nullptr;
|
|
}
|
|
|
|
return new GSTexture11(std::move(texture), desc, type, format);
|
|
}
|
|
|
|
std::unique_ptr<GSDownloadTexture> GSDevice11::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
|
|
{
|
|
return GSDownloadTexture11::Create(width, height, format);
|
|
}
|
|
|
|
void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
|
|
{
|
|
// Empty rect, abort copy.
|
|
if (r.rempty())
|
|
{
|
|
GL_INS("D3D11: CopyRect rect empty.");
|
|
return;
|
|
}
|
|
|
|
const GSVector4i src_rect(0, 0, sTex->GetWidth(), sTex->GetHeight());
|
|
const GSVector4i dst_rect(0, 0, dTex->GetWidth(), dTex->GetHeight());
|
|
const bool src_dst_rect_match = src_rect.eq(dst_rect);
|
|
|
|
// Sizes must match for full depth copies when no partial copies are supported.
|
|
if (sTex->IsDepthStencil() && !src_dst_rect_match)
|
|
{
|
|
GL_INS("D3D11: CopyRect rect mismatch for full depth copy.");
|
|
return;
|
|
}
|
|
|
|
const bool full_draw_copy = sTex->IsDepthStencil() || dst_rect.eq(r);
|
|
|
|
// Source is cleared, if destination is a render target, we can carry the clear forward.
|
|
if (sTex->GetState() == GSTexture::State::Cleared)
|
|
{
|
|
if (dTex->IsRenderTargetOrDepthStencil() && ProcessClearsBeforeCopy(sTex, dTex, full_draw_copy))
|
|
return;
|
|
|
|
// Commit clear for the source texture.
|
|
CommitClear(sTex);
|
|
}
|
|
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
|
|
// Commit destination clear if partially overwritten (color only).
|
|
if (dTex->GetState() == GSTexture::State::Cleared && !full_draw_copy)
|
|
CommitClear(dTex);
|
|
|
|
// DX11 doesn't support partial depth copy so we need to
|
|
// either pass a nullptr D3D11_BOX for a full depth copy or use CopyResource instead.
|
|
// Optimization: Use CopyResource for depth copies or full rect color copies, it's faster than CopySubresourceRegion.
|
|
const bool full_rt_copy = src_dst_rect_match && (sTex->IsDepthStencil() || (destX == 0 && destY == 0 && r.eq(src_rect)));
|
|
if (full_rt_copy)
|
|
{
|
|
m_ctx->CopyResource(*static_cast<GSTexture11*>(dTex), *static_cast<GSTexture11*>(sTex));
|
|
}
|
|
else
|
|
{
|
|
const D3D11_BOX box = {static_cast<UINT>(r.left), static_cast<UINT>(r.top), 0U, static_cast<UINT>(r.right), static_cast<UINT>(r.bottom), 1U};
|
|
m_ctx->CopySubresourceRegion(*static_cast<GSTexture11*>(dTex), 0, destX, destY, 0, *static_cast<GSTexture11*>(sTex), 0, &box);
|
|
}
|
|
|
|
dTex->SetState(GSTexture::State::Dirty);
|
|
}
|
|
|
|
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
|
GSHWDrawConfig::ColorMaskSelector cms, ShaderConvert shader, bool linear)
|
|
{
|
|
DoStretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr, m_convert.bs[cms.wrgba].get(), linear);
|
|
}
|
|
|
|
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear)
|
|
{
|
|
DoStretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), linear);
|
|
}
|
|
|
|
void GSDevice11::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear)
|
|
{
|
|
CommitClear(sTex);
|
|
|
|
const bool draw_in_depth = dTex && dTex->IsDepthStencil();
|
|
|
|
GSVector2i ds;
|
|
if (dTex)
|
|
{
|
|
// ps unbind conflicting srvs
|
|
PSUnbindConflictingSRVs(dTex);
|
|
|
|
ds = dTex->GetSize();
|
|
if (draw_in_depth)
|
|
OMSetRenderTargets(nullptr, dTex);
|
|
else
|
|
OMSetRenderTargets(dTex, nullptr);
|
|
}
|
|
else
|
|
{
|
|
ds = GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
|
|
}
|
|
|
|
// om
|
|
|
|
if (draw_in_depth)
|
|
OMSetDepthStencilState(m_convert.dss_write.get(), 0);
|
|
else
|
|
OMSetDepthStencilState(m_convert.dss.get(), 0);
|
|
|
|
OMSetBlendState(bs, 0);
|
|
|
|
// ia
|
|
|
|
const float left = dRect.x * 2 / ds.x - 1.0f;
|
|
const float top = 1.0f - dRect.y * 2 / ds.y;
|
|
const float right = dRect.z * 2 / ds.x - 1.0f;
|
|
const float bottom = 1.0f - dRect.w * 2 / ds.y;
|
|
|
|
GSVertexPT1 vertices[] =
|
|
{
|
|
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
|
|
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
|
|
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
|
|
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
|
|
};
|
|
|
|
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
|
|
IASetInputLayout(m_convert.il.get());
|
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
|
|
// vs
|
|
|
|
VSSetShader(m_convert.vs.get(), nullptr);
|
|
|
|
// ps
|
|
|
|
PSSetShaderResource(0, sTex);
|
|
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
|
|
PSSetShader(ps, ps_cb);
|
|
|
|
// draw
|
|
|
|
DrawPrimitive();
|
|
}
|
|
|
|
void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear)
|
|
{
|
|
CommitClear(sTex);
|
|
|
|
GSVector2i ds;
|
|
if (dTex)
|
|
{
|
|
// ps unbind conflicting srvs
|
|
PSUnbindConflictingSRVs(dTex);
|
|
|
|
ds = dTex->GetSize();
|
|
OMSetRenderTargets(dTex, nullptr);
|
|
}
|
|
else
|
|
{
|
|
ds = GSVector2i(m_window_info.surface_width, m_window_info.surface_height);
|
|
}
|
|
|
|
DisplayConstantBuffer cb;
|
|
cb.SetSource(sRect, sTex->GetSize());
|
|
cb.SetTarget(dRect, ds);
|
|
cb.SetTime(shaderTime);
|
|
m_ctx->UpdateSubresource(m_present.ps_cb.get(), 0, nullptr, &cb, 0, 0);
|
|
|
|
// om
|
|
|
|
OMSetDepthStencilState(m_convert.dss.get(), 0);
|
|
OMSetBlendState(m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), 0);
|
|
|
|
// ia
|
|
|
|
const float left = dRect.x * 2 / ds.x - 1.0f;
|
|
const float top = 1.0f - dRect.y * 2 / ds.y;
|
|
const float right = dRect.z * 2 / ds.x - 1.0f;
|
|
const float bottom = 1.0f - dRect.w * 2 / ds.y;
|
|
|
|
GSVertexPT1 vertices[] =
|
|
{
|
|
{GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)},
|
|
{GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)},
|
|
{GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)},
|
|
{GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)},
|
|
};
|
|
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices));
|
|
IASetInputLayout(m_present.il.get());
|
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
|
|
// vs
|
|
|
|
VSSetShader(m_present.vs.get(), nullptr);
|
|
|
|
// ps
|
|
|
|
PSSetShaderResource(0, sTex);
|
|
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
|
|
PSSetShader(m_present.ps[static_cast<u32>(shader)].get(), m_present.ps_cb.get());
|
|
|
|
// draw
|
|
|
|
DrawPrimitive();
|
|
}
|
|
|
|
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
|
{
|
|
// match merge cb
|
|
struct Uniforms
|
|
{
|
|
float scale;
|
|
float pad1[3];
|
|
u32 offsetX, offsetY, dOffset;
|
|
};
|
|
const Uniforms cb = {sScale, {}, offsetX, offsetY, dOffset};
|
|
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
|
|
|
|
const GSVector4 dRect(0, 0, dSize, 1);
|
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
|
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
|
|
}
|
|
|
|
void GSDevice11::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM)
|
|
{
|
|
// match merge cb
|
|
struct Uniforms
|
|
{
|
|
float scale;
|
|
float pad1[3];
|
|
u32 SBW, DBW, SPSM;
|
|
};
|
|
|
|
const Uniforms cb = {sScale, {}, SBW, DBW, SPSM};
|
|
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
|
|
|
|
const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight());
|
|
const ShaderConvert shader = ((SPSM & 0xE) == 0) ? ShaderConvert::RGBA_TO_8I : ShaderConvert::RGB5A1_TO_8I;
|
|
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
|
|
}
|
|
|
|
void GSDevice11::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min, const GSVector4& dRect)
|
|
{
|
|
struct Uniforms
|
|
{
|
|
float weight;
|
|
float step_multiplier;
|
|
float pad0[2];
|
|
GSVector2i clamp_min;
|
|
int downsample_factor;
|
|
int pad1;
|
|
};
|
|
|
|
const Uniforms cb = {
|
|
static_cast<float>(downsample_factor * downsample_factor), (GSConfig.UserHacks_NativeScaling > GSNativeScaling::Aggressive) ? 2.0f : 1.0f, {}, clamp_min, static_cast<int>(downsample_factor), 0};
|
|
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
|
|
|
|
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
|
|
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
|
|
}
|
|
|
|
void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
|
|
{
|
|
IASetInputLayout(m_convert.il.get());
|
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
|
|
VSSetShader(m_convert.vs.get(), nullptr);
|
|
PSSetShader(m_convert.ps[static_cast<int>(shader)].get(), nullptr);
|
|
PSUnbindConflictingSRVs(dTex);
|
|
|
|
OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0);
|
|
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr);
|
|
|
|
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
|
GSTexture* last_tex = rects[0].src;
|
|
bool last_linear = rects[0].linear;
|
|
u8 last_wmask = rects[0].wmask.wrgba;
|
|
|
|
u32 first = 0;
|
|
u32 count = 1;
|
|
|
|
for (u32 i = 1; i < num_rects; i++)
|
|
{
|
|
if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask)
|
|
{
|
|
count++;
|
|
continue;
|
|
}
|
|
|
|
DoMultiStretchRects(rects + first, count, ds);
|
|
last_tex = rects[i].src;
|
|
last_linear = rects[i].linear;
|
|
last_wmask = rects[i].wmask.wrgba;
|
|
first += count;
|
|
count = 1;
|
|
}
|
|
|
|
DoMultiStretchRects(rects + first, count, ds);
|
|
}
|
|
|
|
void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
|
|
{
|
|
// Don't use primitive restart here, it ends up slower on some drivers.
|
|
const u32 vertex_reserve_size = num_rects * 4;
|
|
const u32 index_reserve_size = num_rects * 6;
|
|
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
|
|
u16* idx = IAMapIndexBuffer(index_reserve_size);
|
|
u32 icount = 0;
|
|
u32 vcount = 0;
|
|
for (u32 i = 0; i < num_rects; i++)
|
|
{
|
|
const GSVector4& sRect = rects[i].src_rect;
|
|
const GSVector4& dRect = rects[i].dst_rect;
|
|
const float left = dRect.x * 2 / ds.x - 1.0f;
|
|
const float top = 1.0f - dRect.y * 2 / ds.y;
|
|
const float right = dRect.z * 2 / ds.x - 1.0f;
|
|
const float bottom = 1.0f - dRect.w * 2 / ds.y;
|
|
|
|
const u32 vstart = vcount;
|
|
verts[vcount++] = {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)};
|
|
verts[vcount++] = {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)};
|
|
verts[vcount++] = {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)};
|
|
verts[vcount++] = {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)};
|
|
|
|
if (i > 0)
|
|
idx[icount++] = vstart;
|
|
|
|
idx[icount++] = vstart;
|
|
idx[icount++] = vstart + 1;
|
|
idx[icount++] = vstart + 2;
|
|
idx[icount++] = vstart + 3;
|
|
idx[icount++] = vstart + 3;
|
|
};
|
|
|
|
IAUnmapVertexBuffer(sizeof(GSVertexPT1), vcount);
|
|
IAUnmapIndexBuffer(icount);
|
|
IASetIndexBuffer(m_ib.get());
|
|
|
|
CommitClear(rects[0].src);
|
|
PSSetShaderResource(0, rects[0].src);
|
|
PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get());
|
|
|
|
OMSetBlendState(m_convert.bs[rects[0].wmask.wrgba].get(), 0.0f);
|
|
|
|
DrawIndexedPrimitive();
|
|
}
|
|
|
|
|
|
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear)
|
|
{
|
|
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);
|
|
const bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1;
|
|
const bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0;
|
|
const bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1;
|
|
|
|
// Merge the 2 source textures (sTex[0],sTex[1]). Final results go to dTex. Feedback write will go to sTex[2].
|
|
// If either 2nd output is disabled or SLBG is 1, a background color will be used.
|
|
// Note: background color is also used when outside of the unit rectangle area
|
|
ClearRenderTarget(dTex, c);
|
|
|
|
// Upload constant to select YUV algo, but skip constant buffer update if we don't need it
|
|
if (feedback_write_2 || feedback_write_1 || sTex[0])
|
|
{
|
|
const MergeConstantBuffer cb = {GSVector4::unorm8(c), EXTBUF.EMODA, EXTBUF.EMODC};
|
|
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
|
|
}
|
|
|
|
if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg))
|
|
{
|
|
// 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output
|
|
// Note: value outside of dRect must contains the background color (c)
|
|
StretchRect(sTex[1], sRect[1], dTex, PMODE.SLBG ? dRect[2] : dRect[1], ShaderConvert::COPY, linear);
|
|
}
|
|
|
|
// Save 2nd output
|
|
if (feedback_write_2)
|
|
{
|
|
DoStretchRect(dTex, full_r, sTex[2], dRect[2], m_convert.ps[static_cast<int>(ShaderConvert::YUV)].get(),
|
|
m_merge.cb.get(), nullptr, linear);
|
|
}
|
|
|
|
// Restore background color to process the normal merge
|
|
if (feedback_write_2_but_blend_bg)
|
|
ClearRenderTarget(dTex, c);
|
|
|
|
if (sTex[0])
|
|
{
|
|
// 1st output is enabled. It must be blended
|
|
DoStretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge.ps[PMODE.MMOD].get(), m_merge.cb.get(), m_merge.bs.get(), linear);
|
|
}
|
|
|
|
if (feedback_write_1)
|
|
{
|
|
DoStretchRect(dTex, full_r, sTex[2], dRect[2], m_convert.ps[static_cast<int>(ShaderConvert::YUV)].get(),
|
|
m_merge.cb.get(), nullptr, linear);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb)
|
|
{
|
|
m_ctx->UpdateSubresource(m_interlace.cb.get(), 0, nullptr, &cb, 0, 0);
|
|
|
|
DoStretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[static_cast<int>(shader)].get(), m_interlace.cb.get(), linear);
|
|
}
|
|
|
|
void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex)
|
|
{
|
|
const GSVector2i s = dTex->GetSize();
|
|
|
|
const GSVector4 sRect(0, 0, 1, 1);
|
|
const GSVector4 dRect(0, 0, s.x, s.y);
|
|
|
|
if (!m_fxaa_ps)
|
|
{
|
|
const std::optional<std::string> shader = ReadShaderSource("shaders/common/fxaa.fx");
|
|
if (!shader.has_value())
|
|
{
|
|
Console.Error("D3D11: FXAA shader is missing");
|
|
return;
|
|
}
|
|
|
|
ShaderMacro sm;
|
|
sm.AddMacro("FXAA_HLSL", "1");
|
|
m_fxaa_ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm.GetPtr(), "main");
|
|
if (!m_fxaa_ps)
|
|
return;
|
|
}
|
|
|
|
DoStretchRect(sTex, sRect, dTex, dRect, m_fxaa_ps.get(), nullptr, true);
|
|
}
|
|
|
|
void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4])
|
|
{
|
|
const GSVector2i s = dTex->GetSize();
|
|
|
|
const GSVector4 sRect(0, 0, 1, 1);
|
|
const GSVector4 dRect(0, 0, s.x, s.y);
|
|
|
|
m_ctx->UpdateSubresource(m_shadeboost.cb.get(), 0, nullptr, params, 0, 0);
|
|
|
|
DoStretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps.get(), m_shadeboost.cb.get(), false);
|
|
}
|
|
|
|
void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb)
|
|
{
|
|
auto i = std::as_const(m_vs).find(sel.key);
|
|
|
|
if (i == m_vs.end())
|
|
{
|
|
ShaderMacro sm;
|
|
|
|
sm.AddMacro("VERTEX_SHADER", 1);
|
|
sm.AddMacro("VS_TME", sel.tme);
|
|
sm.AddMacro("VS_FST", sel.fst);
|
|
sm.AddMacro("VS_IIP", sel.iip);
|
|
sm.AddMacro("VS_EXPAND", static_cast<int>(sel.expand));
|
|
|
|
static constexpr const D3D11_INPUT_ELEMENT_DESC layout[] =
|
|
{
|
|
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
|
};
|
|
|
|
GSVertexShader11 vs;
|
|
if (sel.expand == GSHWDrawConfig::VSExpand::None)
|
|
{
|
|
m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), vs.vs.put(), vs.il.put(), layout,
|
|
std::size(layout), m_tfx_source, sm.GetPtr(), "vs_main");
|
|
}
|
|
else
|
|
{
|
|
vs.vs = m_shader_cache.GetVertexShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "vs_main_expand");
|
|
}
|
|
|
|
i = m_vs.try_emplace(sel.key, std::move(vs)).first;
|
|
}
|
|
|
|
if (m_vs_cb_cache.Update(*cb))
|
|
{
|
|
m_ctx->UpdateSubresource(m_vs_cb.get(), 0, NULL, cb, 0, 0);
|
|
}
|
|
|
|
VSSetShader(i->second.vs.get(), m_vs_cb.get());
|
|
|
|
IASetInputLayout(i->second.il.get());
|
|
}
|
|
|
|
void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
|
|
{
|
|
auto i = std::as_const(m_ps).find(sel);
|
|
|
|
if (i == m_ps.end())
|
|
{
|
|
ShaderMacro sm;
|
|
|
|
sm.AddMacro("PIXEL_SHADER", 1);
|
|
sm.AddMacro("PS_FST", sel.fst);
|
|
sm.AddMacro("PS_WMS", sel.wms);
|
|
sm.AddMacro("PS_WMT", sel.wmt);
|
|
sm.AddMacro("PS_ADJS", sel.adjs);
|
|
sm.AddMacro("PS_ADJT", sel.adjt);
|
|
sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
|
|
sm.AddMacro("PS_AEM", sel.aem);
|
|
sm.AddMacro("PS_TFX", sel.tfx);
|
|
sm.AddMacro("PS_TCC", sel.tcc);
|
|
sm.AddMacro("PS_DATE", sel.date);
|
|
sm.AddMacro("PS_ATST", sel.atst);
|
|
sm.AddMacro("PS_AFAIL", sel.afail);
|
|
sm.AddMacro("PS_FOG", sel.fog);
|
|
sm.AddMacro("PS_IIP", sel.iip);
|
|
sm.AddMacro("PS_BLEND_HW", sel.blend_hw);
|
|
sm.AddMacro("PS_A_MASKED", sel.a_masked);
|
|
sm.AddMacro("PS_FBA", sel.fba);
|
|
sm.AddMacro("PS_FBMASK", sel.fbmask);
|
|
sm.AddMacro("PS_LTF", sel.ltf);
|
|
sm.AddMacro("PS_TCOFFSETHACK", sel.tcoffsethack);
|
|
sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler);
|
|
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
|
|
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
|
|
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
|
|
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
|
|
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
|
|
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
|
|
sm.AddMacro("PS_READ16_SRC", sel.real16src);
|
|
sm.AddMacro("PS_WRITE_RG", sel.write_rg);
|
|
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
|
|
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
|
|
sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);
|
|
sm.AddMacro("PS_DST_FMT", sel.dst_fmt);
|
|
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
|
|
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
|
|
sm.AddMacro("PS_COLCLIP_HW", sel.colclip_hw);
|
|
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
|
|
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
|
|
sm.AddMacro("PS_COLCLIP", sel.colclip);
|
|
sm.AddMacro("PS_BLEND_A", sel.blend_a);
|
|
sm.AddMacro("PS_BLEND_B", sel.blend_b);
|
|
sm.AddMacro("PS_BLEND_C", sel.blend_c);
|
|
sm.AddMacro("PS_BLEND_D", sel.blend_d);
|
|
sm.AddMacro("PS_BLEND_MIX", sel.blend_mix);
|
|
sm.AddMacro("PS_ROUND_INV", sel.round_inv);
|
|
sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a);
|
|
sm.AddMacro("PS_PABE", sel.pabe);
|
|
sm.AddMacro("PS_DITHER", sel.dither);
|
|
sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust);
|
|
sm.AddMacro("PS_ZCLAMP", sel.zclamp);
|
|
sm.AddMacro("PS_ZFLOOR", sel.zfloor);
|
|
sm.AddMacro("PS_SCANMSK", sel.scanmsk);
|
|
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);
|
|
sm.AddMacro("PS_MANUAL_LOD", sel.manual_lod);
|
|
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
|
|
sm.AddMacro("PS_NO_COLOR", sel.no_color);
|
|
sm.AddMacro("PS_NO_COLOR1", sel.no_color1);
|
|
|
|
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
|
|
i = m_ps.try_emplace(sel, std::move(ps)).first;
|
|
}
|
|
|
|
if (cb && m_ps_cb_cache.Update(*cb))
|
|
{
|
|
m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0);
|
|
}
|
|
|
|
wil::com_ptr_nothrow<ID3D11SamplerState> ss0;
|
|
|
|
if (sel.tfx != 4)
|
|
{
|
|
if (sel.pal_fmt || sel.wms >= 3 || sel.wmt >= 3)
|
|
{
|
|
pxAssert(ssel.biln == 0);
|
|
}
|
|
|
|
auto i = std::as_const(m_ps_ss).find(ssel.key);
|
|
|
|
if (i != m_ps_ss.end())
|
|
{
|
|
ss0 = i->second;
|
|
}
|
|
else
|
|
{
|
|
D3D11_SAMPLER_DESC sd = {};
|
|
|
|
const int anisotropy = GSConfig.MaxAnisotropy;
|
|
if (anisotropy > 1 && ssel.aniso)
|
|
{
|
|
sd.Filter = D3D11_FILTER_ANISOTROPIC;
|
|
}
|
|
else
|
|
{
|
|
static constexpr std::array<D3D11_FILTER, 8> filters = {{
|
|
D3D11_FILTER_MIN_MAG_MIP_POINT, // 000 / min=point,mag=point,mip=point
|
|
D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT, // 001 / min=linear,mag=point,mip=point
|
|
D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT, // 010 / min=point,mag=linear,mip=point
|
|
D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT, // 011 / min=linear,mag=linear,mip=point
|
|
D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR, // 100 / min=point,mag=point,mip=linear
|
|
D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, // 101 / min=linear,mag=point,mip=linear
|
|
D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR, // 110 / min=point,mag=linear,mip=linear
|
|
D3D11_FILTER_MIN_MAG_MIP_LINEAR, // 111 / min=linear,mag=linear,mip=linear
|
|
}};
|
|
|
|
const u8 index = (static_cast<u8>(ssel.IsMipFilterLinear()) << 2) |
|
|
(static_cast<u8>(ssel.IsMagFilterLinear()) << 1) |
|
|
static_cast<u8>(ssel.IsMinFilterLinear());
|
|
sd.Filter = filters[index];
|
|
}
|
|
|
|
sd.AddressU = ssel.tau ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.AddressV = ssel.tav ? D3D11_TEXTURE_ADDRESS_WRAP : D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
|
sd.MinLOD = 0.0f;
|
|
sd.MaxLOD = (ssel.lodclamp || !ssel.UseMipmapFiltering()) ? 0.25f : FLT_MAX;
|
|
sd.MaxAnisotropy = std::clamp(anisotropy, 1, 16);
|
|
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
|
|
|
|
m_dev->CreateSamplerState(&sd, &ss0);
|
|
|
|
m_ps_ss[ssel.key] = ss0;
|
|
}
|
|
}
|
|
|
|
PSSetSamplerState(ss0.get());
|
|
|
|
PSSetShader(i->second.get(), m_ps_cb.get());
|
|
}
|
|
|
|
void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix)
|
|
{
|
|
auto i = std::as_const(m_om_dss).find(dssel.key);
|
|
|
|
if (i == m_om_dss.end())
|
|
{
|
|
D3D11_DEPTH_STENCIL_DESC dsd;
|
|
|
|
memset(&dsd, 0, sizeof(dsd));
|
|
|
|
if (dssel.date)
|
|
{
|
|
dsd.StencilEnable = true;
|
|
dsd.StencilReadMask = 1;
|
|
dsd.StencilWriteMask = 1;
|
|
dsd.FrontFace.StencilFunc = D3D11_COMPARISON_EQUAL;
|
|
dsd.FrontFace.StencilPassOp = dssel.date_one ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP;
|
|
dsd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.BackFace.StencilFunc = D3D11_COMPARISON_EQUAL;
|
|
dsd.BackFace.StencilPassOp = dssel.date_one ? D3D11_STENCIL_OP_ZERO : D3D11_STENCIL_OP_KEEP;
|
|
dsd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP;
|
|
dsd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP;
|
|
}
|
|
|
|
if (dssel.ztst != ZTST_ALWAYS || dssel.zwe)
|
|
{
|
|
static const D3D11_COMPARISON_FUNC ztst[] =
|
|
{
|
|
D3D11_COMPARISON_NEVER,
|
|
D3D11_COMPARISON_ALWAYS,
|
|
D3D11_COMPARISON_GREATER_EQUAL,
|
|
D3D11_COMPARISON_GREATER
|
|
};
|
|
|
|
dsd.DepthEnable = true;
|
|
dsd.DepthWriteMask = dssel.zwe ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
|
|
dsd.DepthFunc = ztst[dssel.ztst];
|
|
}
|
|
|
|
wil::com_ptr_nothrow<ID3D11DepthStencilState> dss;
|
|
m_dev->CreateDepthStencilState(&dsd, dss.put());
|
|
|
|
i = m_om_dss.try_emplace(dssel.key, std::move(dss)).first;
|
|
}
|
|
|
|
OMSetDepthStencilState(i->second.get(), 1);
|
|
|
|
auto j = std::as_const(m_om_bs).find(bsel.key);
|
|
|
|
if (j == m_om_bs.end())
|
|
{
|
|
D3D11_BLEND_DESC bd;
|
|
|
|
memset(&bd, 0, sizeof(bd));
|
|
|
|
if (bsel.blend.IsEffective(bsel.colormask))
|
|
{
|
|
// clang-format off
|
|
static constexpr std::array<D3D11_BLEND, 16> s_d3d11_blend_factors = { {
|
|
D3D11_BLEND_SRC_COLOR, D3D11_BLEND_INV_SRC_COLOR, D3D11_BLEND_DEST_COLOR, D3D11_BLEND_INV_DEST_COLOR,
|
|
D3D11_BLEND_SRC1_COLOR, D3D11_BLEND_INV_SRC1_COLOR, D3D11_BLEND_SRC_ALPHA, D3D11_BLEND_INV_SRC_ALPHA,
|
|
D3D11_BLEND_DEST_ALPHA, D3D11_BLEND_INV_DEST_ALPHA, D3D11_BLEND_SRC1_ALPHA, D3D11_BLEND_INV_SRC1_ALPHA,
|
|
D3D11_BLEND_BLEND_FACTOR, D3D11_BLEND_INV_BLEND_FACTOR, D3D11_BLEND_ONE, D3D11_BLEND_ZERO
|
|
} };
|
|
static constexpr std::array<D3D11_BLEND_OP, 4> s_d3d11_blend_ops = { {
|
|
D3D11_BLEND_OP_ADD, D3D11_BLEND_OP_SUBTRACT, D3D11_BLEND_OP_REV_SUBTRACT, D3D11_BLEND_OP_MIN
|
|
} };
|
|
// clang-format on
|
|
|
|
bd.RenderTarget[0].BlendEnable = bsel.blend.enable;
|
|
bd.RenderTarget[0].BlendOp = s_d3d11_blend_ops[bsel.blend.op];
|
|
bd.RenderTarget[0].SrcBlend = s_d3d11_blend_factors[bsel.blend.src_factor];
|
|
bd.RenderTarget[0].DestBlend = s_d3d11_blend_factors[bsel.blend.dst_factor];
|
|
bd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
|
|
bd.RenderTarget[0].SrcBlendAlpha = s_d3d11_blend_factors[bsel.blend.src_factor_alpha];
|
|
bd.RenderTarget[0].DestBlendAlpha = s_d3d11_blend_factors[bsel.blend.dst_factor_alpha];
|
|
}
|
|
|
|
if (bsel.colormask.wr)
|
|
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_RED;
|
|
if (bsel.colormask.wg)
|
|
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
|
|
if (bsel.colormask.wb)
|
|
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_BLUE;
|
|
if (bsel.colormask.wa)
|
|
bd.RenderTarget[0].RenderTargetWriteMask |= D3D11_COLOR_WRITE_ENABLE_ALPHA;
|
|
|
|
wil::com_ptr_nothrow<ID3D11BlendState> bs;
|
|
m_dev->CreateBlendState(&bd, bs.put());
|
|
|
|
j = m_om_bs.try_emplace(bsel.key, std::move(bs)).first;
|
|
}
|
|
|
|
OMSetBlendState(j->second.get(), afix);
|
|
}
|
|
|
|
bool GSDevice11::CreateCASShaders()
|
|
{
|
|
CD3D11_BUFFER_DESC desc(NUM_CAS_CONSTANTS * sizeof(u32), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
|
|
HRESULT hr = m_dev->CreateBuffer(&desc, nullptr, m_cas.cb.put());
|
|
if (FAILED(hr))
|
|
return false;
|
|
|
|
std::optional<std::string> cas_source = ReadShaderSource("shaders/dx11/cas.hlsl");
|
|
if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value()))
|
|
return false;
|
|
|
|
static constexpr D3D_SHADER_MACRO sharpen_only_macros[] = {
|
|
{"CAS_SHARPEN_ONLY", "1"},
|
|
{nullptr, nullptr}};
|
|
|
|
m_cas.cs_sharpen = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), sharpen_only_macros, "main");
|
|
m_cas.cs_upscale = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), nullptr, "main");
|
|
if (!m_cas.cs_sharpen || !m_cas.cs_upscale)
|
|
{
|
|
Console.Error("D3D11: Failed to create CAS compute shaders.");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice11::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
|
|
{
|
|
static const int threadGroupWorkRegionDim = 16;
|
|
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
|
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
|
|
|
ID3D11ShaderResourceView* srvs[1] = {*static_cast<GSTexture11*>(sTex)};
|
|
ID3D11UnorderedAccessView* uavs[1] = {*static_cast<GSTexture11*>(dTex)};
|
|
m_ctx->OMSetRenderTargets(0, nullptr, nullptr);
|
|
m_ctx->UpdateSubresource(m_cas.cb.get(), 0, nullptr, constants.data(), 0, 0);
|
|
m_ctx->CSSetConstantBuffers(0, 1, m_cas.cb.addressof());
|
|
m_ctx->CSSetShader(sharpen_only ? m_cas.cs_sharpen.get() : m_cas.cs_upscale.get(), nullptr, 0);
|
|
m_ctx->CSSetShaderResources(0, std::size(srvs), srvs);
|
|
m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr);
|
|
m_ctx->Dispatch(dispatchX, dispatchY, 1);
|
|
|
|
// clear bindings out to prevent hazards
|
|
uavs[0] = nullptr;
|
|
srvs[0] = nullptr;
|
|
m_ctx->CSSetShaderResources(0, std::size(srvs), srvs);
|
|
m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice11::CreateImGuiResources()
|
|
{
|
|
HRESULT hr;
|
|
|
|
const std::optional<std::string> hlsl = ReadShaderSource("shaders/dx11/imgui.fx");
|
|
if (!hlsl.has_value())
|
|
{
|
|
Console.Error("D3D11: Failed to read imgui.fx");
|
|
return false;
|
|
}
|
|
|
|
// clang-format off
|
|
static constexpr D3D11_INPUT_ELEMENT_DESC layout[] =
|
|
{
|
|
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 },
|
|
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 },
|
|
{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)offsetof(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 },
|
|
};
|
|
// clang-format on
|
|
|
|
if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_imgui.vs.put(), m_imgui.il.put(), layout,
|
|
std::size(layout), hlsl.value(), nullptr, "vs_main") ||
|
|
!(m_imgui.ps = m_shader_cache.GetPixelShader(m_dev.get(), hlsl.value(), nullptr, "ps_main")))
|
|
{
|
|
Console.Error("D3D11: Failed to compile ImGui shaders");
|
|
return false;
|
|
}
|
|
|
|
D3D11_BLEND_DESC blend_desc = {};
|
|
blend_desc.RenderTarget[0].BlendEnable = true;
|
|
blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
|
|
blend_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
|
|
hr = m_dev->CreateBlendState(&blend_desc, m_imgui.bs.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: CreateImGuiResources(): CreateBlendState() failed: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
D3D11_BUFFER_DESC buffer_desc = {};
|
|
buffer_desc.Usage = D3D11_USAGE_DEFAULT;
|
|
buffer_desc.ByteWidth = sizeof(float) * 4 * 4;
|
|
buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
|
hr = m_dev->CreateBuffer(&buffer_desc, nullptr, m_imgui.vs_cb.put());
|
|
if (FAILED(hr))
|
|
{
|
|
Console.Error("D3D11: CreateImGuiResources(): CreateBlendState() failed: %08X", hr);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::RenderImGui()
|
|
{
|
|
ImGui::Render();
|
|
const ImDrawData* draw_data = ImGui::GetDrawData();
|
|
if (draw_data->CmdListsCount == 0)
|
|
return;
|
|
|
|
UpdateImGuiTextures();
|
|
|
|
const float L = 0.0f;
|
|
const float R = static_cast<float>(m_window_info.surface_width);
|
|
const float T = 0.0f;
|
|
const float B = static_cast<float>(m_window_info.surface_height);
|
|
|
|
// clang-format off
|
|
const float ortho_projection[4][4] =
|
|
{
|
|
{ 2.0f/(R-L), 0.0f, 0.0f, 0.0f },
|
|
{ 0.0f, 2.0f/(T-B), 0.0f, 0.0f },
|
|
{ 0.0f, 0.0f, 0.5f, 0.0f },
|
|
{ (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f },
|
|
};
|
|
// clang-format on
|
|
|
|
m_ctx->UpdateSubresource(m_imgui.vs_cb.get(), 0, nullptr, ortho_projection, 0, 0);
|
|
|
|
const UINT vb_stride = sizeof(ImDrawVert);
|
|
const UINT vb_offset = 0;
|
|
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
|
|
IASetInputLayout(m_imgui.il.get());
|
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
|
|
PSSetShader(m_imgui.ps.get(), nullptr);
|
|
OMSetBlendState(m_imgui.bs.get(), 0.0f);
|
|
OMSetDepthStencilState(m_convert.dss.get(), 0);
|
|
PSSetSamplerState(m_convert.ln.get());
|
|
|
|
// Render command lists
|
|
for (int n = 0; n < draw_data->CmdListsCount; n++)
|
|
{
|
|
const ImDrawList* cmd_list = draw_data->CmdLists[n];
|
|
|
|
// This mess is because the vertex size isn't the same...
|
|
u32 vertex_offset;
|
|
{
|
|
static_assert(std::has_single_bit(sizeof(GSVertexPT1)));
|
|
|
|
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
|
|
|
const u32 unaligned_size = cmd_list->VtxBuffer.Size * sizeof(ImDrawVert);
|
|
u32 start_pos = Common::AlignUp(m_vb_pos, sizeof(ImDrawVert));
|
|
u32 end_pos = Common::AlignUpPow2(start_pos + unaligned_size, sizeof(GSVertexPT1));
|
|
if (end_pos > VERTEX_BUFFER_SIZE)
|
|
{
|
|
type = D3D11_MAP_WRITE_DISCARD;
|
|
m_vb_pos = 0;
|
|
start_pos = 0;
|
|
end_pos = Common::AlignUpPow2(unaligned_size, sizeof(GSVertexPT1));
|
|
}
|
|
|
|
m_vb_pos = end_pos;
|
|
vertex_offset = start_pos / sizeof(ImDrawVert);
|
|
|
|
D3D11_MAPPED_SUBRESOURCE sr;
|
|
const HRESULT hr = m_ctx->Map(m_vb.get(), 0, type, 0, &sr);
|
|
if (FAILED(hr))
|
|
continue;
|
|
|
|
std::memcpy(static_cast<u8*>(sr.pData) + start_pos, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert));
|
|
m_ctx->Unmap(m_vb.get(), 0);
|
|
}
|
|
|
|
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
|
|
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
|
{
|
|
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
|
|
pxAssert(!pcmd->UserCallback);
|
|
|
|
const GSVector4 clip = GSVector4::load<false>(&pcmd->ClipRect);
|
|
if ((clip.zwzw() <= clip.xyxy()).mask() != 0)
|
|
continue;
|
|
|
|
const GSVector4i iclip = GSVector4i(clip);
|
|
if (!m_state.scissor.eq(iclip))
|
|
{
|
|
m_state.scissor = iclip;
|
|
m_ctx->RSSetScissorRects(1, reinterpret_cast<const D3D11_RECT*>(&iclip));
|
|
}
|
|
|
|
// Since we don't have the GSTexture...
|
|
m_state.ps_sr_views[0] = reinterpret_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
|
|
PSUpdateShaderState(true, true);
|
|
|
|
m_ctx->DrawIndexed(pcmd->ElemCount, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
|
|
}
|
|
|
|
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
|
}
|
|
|
|
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
|
|
}
|
|
|
|
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox)
|
|
{
|
|
// sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows
|
|
|
|
CommitClear(rt);
|
|
CommitClear(ds);
|
|
|
|
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(ds), D3D11_CLEAR_STENCIL, 0.0f, 0);
|
|
|
|
// ps unbind conflicting srvs
|
|
|
|
PSUnbindConflictingSRVs(ds);
|
|
|
|
// om
|
|
|
|
OMSetDepthStencilState(m_date.dss.get(), 1);
|
|
OMSetBlendState(m_date.bs.get(), 0);
|
|
OMSetRenderTargets(nullptr, ds);
|
|
|
|
// ia
|
|
|
|
const GSVector4 src = GSVector4(bbox) / GSVector4(ds->GetSize()).xyxy();
|
|
const GSVector4 dst = src * 2.0f - 1.0f;
|
|
|
|
const GSVertexPT1 vertices[] =
|
|
{
|
|
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
|
|
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
|
|
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
|
|
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
|
|
};
|
|
|
|
IASetVertexBuffer(vertices, sizeof(vertices[0]), 4);
|
|
IASetInputLayout(m_convert.il.get());
|
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
|
|
|
// vs
|
|
|
|
VSSetShader(m_convert.vs.get(), nullptr);
|
|
|
|
// ps
|
|
|
|
PSSetShaderResource(0, rt);
|
|
PSSetSamplerState(m_convert.pt.get());
|
|
PSSetShader(m_convert.ps[SetDATMShader(datm)].get(), nullptr);
|
|
|
|
// draw
|
|
|
|
DrawPrimitive();
|
|
}
|
|
|
|
void* GSDevice11::IAMapVertexBuffer(u32 stride, u32 count)
|
|
{
|
|
const u32 size = stride * count;
|
|
if (size > VERTEX_BUFFER_SIZE)
|
|
return nullptr;
|
|
|
|
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
|
|
|
m_vertex.start = (m_vb_pos + (stride - 1)) / stride;
|
|
m_vb_pos = (m_vertex.start * stride) + size;
|
|
if (m_vb_pos > VERTEX_BUFFER_SIZE)
|
|
{
|
|
m_vertex.start = 0;
|
|
m_vb_pos = size;
|
|
type = D3D11_MAP_WRITE_DISCARD;
|
|
}
|
|
|
|
D3D11_MAPPED_SUBRESOURCE m;
|
|
if (FAILED(m_ctx->Map(m_vb.get(), 0, type, 0, &m)))
|
|
return nullptr;
|
|
|
|
return static_cast<u8*>(m.pData) + (m_vertex.start * stride);
|
|
}
|
|
|
|
void GSDevice11::IAUnmapVertexBuffer(u32 stride, u32 count)
|
|
{
|
|
m_ctx->Unmap(m_vb.get(), 0);
|
|
|
|
if (m_state.vb_stride != stride)
|
|
{
|
|
m_state.vb_stride = stride;
|
|
const UINT vb_offset = 0;
|
|
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &stride, &vb_offset);
|
|
}
|
|
|
|
m_vertex.count = count;
|
|
}
|
|
|
|
bool GSDevice11::IASetVertexBuffer(const void* vertex, u32 stride, u32 count)
|
|
{
|
|
void* map = IAMapVertexBuffer(stride, count);
|
|
if (!map)
|
|
return false;
|
|
|
|
GSVector4i::storent(map, vertex, count * stride);
|
|
|
|
IAUnmapVertexBuffer(stride, count);
|
|
return true;
|
|
}
|
|
|
|
bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count)
|
|
{
|
|
const u32 size = stride * count;
|
|
if (size > VERTEX_BUFFER_SIZE)
|
|
return false;
|
|
|
|
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
|
|
|
m_vertex.start = (m_structured_vb_pos + (stride - 1)) / stride;
|
|
m_structured_vb_pos = (m_vertex.start * stride) + size;
|
|
if (m_structured_vb_pos > VERTEX_BUFFER_SIZE)
|
|
{
|
|
m_vertex.start = 0;
|
|
m_structured_vb_pos = size;
|
|
type = D3D11_MAP_WRITE_DISCARD;
|
|
}
|
|
|
|
D3D11_MAPPED_SUBRESOURCE m;
|
|
if (FAILED(m_ctx->Map(m_expand_vb.get(), 0, type, 0, &m)))
|
|
return false;
|
|
|
|
void* map = static_cast<u8*>(m.pData) + (m_vertex.start * stride);
|
|
|
|
GSVector4i::storent(map, vertex, count * stride);
|
|
|
|
m_ctx->Unmap(m_expand_vb.get(), 0);
|
|
|
|
m_vertex.count = count;
|
|
return true;
|
|
}
|
|
|
|
u16* GSDevice11::IAMapIndexBuffer(u32 count)
|
|
{
|
|
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
|
return nullptr;
|
|
|
|
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
|
|
|
m_index.start = m_ib_pos;
|
|
m_ib_pos += count;
|
|
|
|
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
|
{
|
|
m_index.start = 0;
|
|
m_ib_pos = count;
|
|
type = D3D11_MAP_WRITE_DISCARD;
|
|
}
|
|
|
|
D3D11_MAPPED_SUBRESOURCE m;
|
|
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
|
return nullptr;
|
|
|
|
return static_cast<u16*>(m.pData) + m_index.start;
|
|
}
|
|
|
|
void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
|
{
|
|
m_ctx->Unmap(m_ib.get(), 0);
|
|
m_index.count = count;
|
|
}
|
|
|
|
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
|
|
{
|
|
u16* map = IAMapIndexBuffer(count);
|
|
if (!map)
|
|
return false;
|
|
|
|
std::memcpy(map, index, count * sizeof(u16));
|
|
IAUnmapIndexBuffer(count);
|
|
IASetIndexBuffer(m_ib.get());
|
|
return true;
|
|
}
|
|
|
|
void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
|
|
{
|
|
if (m_state.index_buffer != buffer)
|
|
{
|
|
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R16_UINT, 0);
|
|
m_state.index_buffer = buffer;
|
|
}
|
|
}
|
|
|
|
void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout)
|
|
{
|
|
if (m_state.layout != layout)
|
|
{
|
|
m_state.layout = layout;
|
|
|
|
m_ctx->IASetInputLayout(layout);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology)
|
|
{
|
|
if (m_state.topology != topology)
|
|
{
|
|
m_state.topology = topology;
|
|
|
|
m_ctx->IASetPrimitiveTopology(topology);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb)
|
|
{
|
|
if (m_state.vs != vs)
|
|
{
|
|
m_state.vs = vs;
|
|
|
|
m_ctx->VSSetShader(vs, nullptr, 0);
|
|
}
|
|
|
|
if (m_state.vs_cb != vs_cb)
|
|
{
|
|
m_state.vs_cb = vs_cb;
|
|
|
|
m_ctx->VSSetConstantBuffers(0, 1, &vs_cb);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
|
{
|
|
m_state.ps_sr_views[i] = *static_cast<GSTexture11*>(sr);
|
|
}
|
|
|
|
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0)
|
|
{
|
|
m_state.ps_ss[0] = ss0;
|
|
}
|
|
|
|
void GSDevice11::ClearSamplerCache()
|
|
{
|
|
m_ps_ss.clear();
|
|
}
|
|
|
|
void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|
{
|
|
if (m_state.ps != ps)
|
|
{
|
|
m_state.ps = ps;
|
|
|
|
m_ctx->PSSetShader(ps, nullptr, 0);
|
|
}
|
|
|
|
if (m_state.ps_cb != ps_cb)
|
|
{
|
|
m_state.ps_cb = ps_cb;
|
|
|
|
m_ctx->PSSetConstantBuffers(0, 1, &ps_cb);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::PSUpdateShaderState(const bool sr_update, const bool ss_update)
|
|
{
|
|
// Shader resource caching requires srv/rtv hazards to be resolved, ensure PSUnbindConflictingSRVs handle.
|
|
if (sr_update)
|
|
{
|
|
bool sr_changed = false;
|
|
for (size_t i = 0; i < m_state.ps_sr_views.size(); ++i)
|
|
{
|
|
if (m_state.ps_cached_sr_views[i] != m_state.ps_sr_views[i])
|
|
{
|
|
sr_changed = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (sr_changed)
|
|
{
|
|
m_state.ps_cached_sr_views = m_state.ps_sr_views;
|
|
m_ctx->PSSetShaderResources(0, m_state.ps_sr_views.size(), m_state.ps_sr_views.data());
|
|
}
|
|
}
|
|
|
|
if (ss_update)
|
|
{
|
|
bool ss_changed = false;
|
|
for (size_t i = 0; i < m_state.ps_ss.size(); ++i)
|
|
{
|
|
if (m_state.ps_cached_ss[i] != m_state.ps_ss[i])
|
|
{
|
|
ss_changed = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (ss_changed)
|
|
{
|
|
m_state.ps_cached_ss = m_state.ps_ss;
|
|
m_ctx->PSSetSamplers(0, m_state.ps_ss.size(), m_state.ps_ss.data());
|
|
}
|
|
}
|
|
}
|
|
|
|
void GSDevice11::PSUnbindConflictingSRVs(GSTexture* tex1, GSTexture* tex2)
|
|
{
|
|
// Make sure no SRVs are bound using the same texture before binding it to a RTV.
|
|
bool changed = false;
|
|
for (size_t i = 0; i < m_state.ps_sr_views.size(); i++)
|
|
{
|
|
if ((tex1 && m_state.ps_sr_views[i] == *static_cast<GSTexture11*>(tex1)) || (tex2 && m_state.ps_sr_views[i] == *static_cast<GSTexture11*>(tex2)))
|
|
{
|
|
m_state.ps_sr_views[i] = nullptr;
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
if (changed)
|
|
PSUpdateShaderState(true, false);
|
|
}
|
|
|
|
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref)
|
|
{
|
|
if (m_state.dss != dss || (dss && m_state.sref != sref))
|
|
{
|
|
m_state.dss = dss;
|
|
m_state.sref = sref;
|
|
|
|
m_ctx->OMSetDepthStencilState(dss, sref);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, u8 bf)
|
|
{
|
|
if (m_state.bs != bs || (bs && m_state.bf != bf))
|
|
{
|
|
m_state.bs = bs;
|
|
m_state.bf = bf;
|
|
|
|
const GSVector4 col(static_cast<float>(bf) / 128.0f);
|
|
|
|
m_ctx->OMSetBlendState(bs, col.v, 0xffffffff);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor, ID3D11DepthStencilView* read_only_dsv)
|
|
{
|
|
ID3D11RenderTargetView* rtv = nullptr;
|
|
ID3D11DepthStencilView* dsv = nullptr;
|
|
|
|
if (rt)
|
|
{
|
|
CommitClear(rt);
|
|
rtv = *static_cast<GSTexture11*>(rt);
|
|
}
|
|
if (read_only_dsv)
|
|
{
|
|
dsv = read_only_dsv;
|
|
}
|
|
else if (ds)
|
|
{
|
|
CommitClear(ds);
|
|
dsv = *static_cast<GSTexture11*>(ds);
|
|
}
|
|
|
|
const bool changed = (m_state.rt_view != rtv || m_state.dsv != dsv);
|
|
g_perfmon.Put(GSPerfMon::RenderPasses, static_cast<double>(changed));
|
|
|
|
if (m_state.rt_view != rtv)
|
|
{
|
|
if (m_state.rt_view)
|
|
m_state.rt_view->Release();
|
|
if (rtv)
|
|
rtv->AddRef();
|
|
m_state.rt_view = rtv;
|
|
m_state.cached_rt_view = rt;
|
|
}
|
|
if (m_state.dsv != dsv)
|
|
{
|
|
if (m_state.dsv)
|
|
m_state.dsv->Release();
|
|
if (dsv)
|
|
dsv->AddRef();
|
|
m_state.dsv = dsv;
|
|
m_state.cached_dsv = ds;
|
|
}
|
|
if (changed)
|
|
m_ctx->OMSetRenderTargets(1, &rtv, dsv);
|
|
|
|
if (rt || ds)
|
|
{
|
|
const GSVector2i size = rt ? rt->GetSize() : ds->GetSize();
|
|
SetViewport(size);
|
|
SetScissor(scissor ? *scissor : GSVector4i::loadh(size));
|
|
}
|
|
}
|
|
|
|
void GSDevice11::SetViewport(const GSVector2i& viewport)
|
|
{
|
|
if (m_state.viewport != viewport)
|
|
{
|
|
m_state.viewport = viewport;
|
|
|
|
const D3D11_VIEWPORT vp = {
|
|
0.0f, 0.0f, static_cast<float>(viewport.x), static_cast<float>(viewport.y), 0.0f, 1.0f};
|
|
m_ctx->RSSetViewports(1, &vp);
|
|
}
|
|
}
|
|
|
|
void GSDevice11::SetScissor(const GSVector4i& scissor)
|
|
{
|
|
static_assert(sizeof(D3D11_RECT) == sizeof(GSVector4i));
|
|
|
|
if (!m_state.scissor.eq(scissor))
|
|
{
|
|
m_state.scissor = scissor;
|
|
m_ctx->RSSetScissorRects(1, reinterpret_cast<const D3D11_RECT*>(&scissor));
|
|
}
|
|
}
|
|
|
|
void GSDevice11::ShaderMacro::AddMacro(const char* n, int d)
|
|
{
|
|
AddMacro(n, std::to_string(d));
|
|
}
|
|
|
|
void GSDevice11::ShaderMacro::AddMacro(const char* n, std::string d)
|
|
{
|
|
mlist.emplace_back(n, std::move(d));
|
|
}
|
|
|
|
D3D_SHADER_MACRO* GSDevice11::ShaderMacro::GetPtr()
|
|
{
|
|
mout.clear();
|
|
|
|
for (auto& i : mlist)
|
|
mout.emplace_back(i.name.c_str(), i.def.c_str());
|
|
|
|
mout.emplace_back(nullptr, nullptr);
|
|
return (D3D_SHADER_MACRO*)mout.data();
|
|
}
|
|
|
|
void GSDevice11::RenderHW(GSHWDrawConfig& config)
|
|
{
|
|
const GSVector2i rtsize = (config.rt ? config.rt : config.ds)->GetSize();
|
|
GSTexture* colclip_rt = g_gs_device->GetColorClipTexture();
|
|
|
|
if (colclip_rt)
|
|
{
|
|
if (config.colclip_mode == GSHWDrawConfig::ColClipMode::EarlyResolve)
|
|
{
|
|
const GSVector2i size = config.rt->GetSize();
|
|
const GSVector4 dRect(config.colclip_update_area);
|
|
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
|
|
StretchRect(colclip_rt, sRect, config.rt, dRect, ShaderConvert::COLCLIP_RESOLVE, false);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
Recycle(colclip_rt);
|
|
|
|
g_gs_device->SetColorClipTexture(nullptr);
|
|
|
|
colclip_rt = nullptr;
|
|
}
|
|
else
|
|
config.ps.colclip_hw = 1;
|
|
}
|
|
|
|
if (config.ps.colclip_hw)
|
|
{
|
|
if (!colclip_rt)
|
|
{
|
|
config.colclip_update_area = config.drawarea;
|
|
|
|
colclip_rt = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::ColorClip, false);
|
|
if (!colclip_rt)
|
|
{
|
|
Console.Warning("D3D11: Failed to allocate ColorClip render target, aborting draw.");
|
|
return;
|
|
}
|
|
|
|
g_gs_device->SetColorClipTexture(colclip_rt);
|
|
|
|
const GSVector4 dRect = GSVector4((config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertOnly) ? GSVector4i::loadh(rtsize) : config.drawarea);
|
|
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
|
|
StretchRect(config.rt, sRect, colclip_rt, dRect, ShaderConvert::COLCLIP_INIT, false);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
}
|
|
}
|
|
|
|
// Destination Alpha Setup
|
|
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
|
|
GSTexture* primid_texture = nullptr;
|
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
|
{
|
|
primid_texture = CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::PrimID, false);
|
|
if (!primid_texture)
|
|
{
|
|
Console.Warning("D3D11: Failed to allocate DATE image, aborting draw.");
|
|
return;
|
|
}
|
|
|
|
DoStretchRect(colclip_rt ? colclip_rt : config.rt, GSVector4(config.drawarea) / GSVector4(rtsize).xyxy(),
|
|
primid_texture, GSVector4(config.drawarea), m_date.primid_init_ps[static_cast<u8>(config.datm)].get(), nullptr, false);
|
|
}
|
|
else if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Stencil ||
|
|
(config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && !multidraw_fb_copy))
|
|
SetupDATE(colclip_rt ? colclip_rt : config.rt, config.ds, config.datm, config.drawarea);
|
|
|
|
if (config.vs.expand != GSHWDrawConfig::VSExpand::None)
|
|
{
|
|
if (!IASetExpandVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
|
|
{
|
|
Console.Error("D3D11: Failed to upload structured vertices (%u)", config.nverts);
|
|
return;
|
|
}
|
|
|
|
config.cb_vs.max_depth.y = m_vertex.start;
|
|
}
|
|
else
|
|
{
|
|
if (!IASetVertexBuffer(config.verts, sizeof(*config.verts), config.nverts))
|
|
{
|
|
Console.Error("D3D11: Failed to upload vertices (%u)", config.nverts);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (config.vs.UseExpandIndexBuffer())
|
|
{
|
|
IASetIndexBuffer(m_expand_ib.get());
|
|
m_index.start = 0;
|
|
m_index.count = config.nindices;
|
|
}
|
|
else
|
|
{
|
|
if (!IASetIndexBuffer(config.indices, config.nindices))
|
|
{
|
|
Console.Error("D3D11: Failed to upload indices (%u)", config.nindices);
|
|
return;
|
|
}
|
|
}
|
|
|
|
D3D11_PRIMITIVE_TOPOLOGY topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
|
switch (config.topology)
|
|
{
|
|
case GSHWDrawConfig::Topology::Point: topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; break;
|
|
case GSHWDrawConfig::Topology::Line: topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; break;
|
|
case GSHWDrawConfig::Topology::Triangle: topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
|
|
}
|
|
IASetPrimitiveTopology(topology);
|
|
|
|
// Depth testing and sampling, bind resource as dsv read only and srv at the same time without the need of a copy.
|
|
ID3D11DepthStencilView* read_only_dsv = nullptr;
|
|
if (config.tex && config.tex == config.ds)
|
|
read_only_dsv = static_cast<GSTexture11*>(config.ds)->ReadOnlyDepthStencilView();
|
|
|
|
// Should be called before changing local srv state.
|
|
PSUnbindConflictingSRVs(colclip_rt ? colclip_rt : config.rt, read_only_dsv ? nullptr : config.ds);
|
|
|
|
if (config.tex)
|
|
{
|
|
CommitClear(config.tex);
|
|
PSSetShaderResource(0, config.tex);
|
|
}
|
|
if (config.pal)
|
|
{
|
|
CommitClear(config.pal);
|
|
PSSetShaderResource(1, config.pal);
|
|
}
|
|
|
|
SetupVS(config.vs, &config.cb_vs);
|
|
SetupPS(config.ps, &config.cb_ps, config.sampler);
|
|
|
|
if (primid_texture)
|
|
{
|
|
OMDepthStencilSelector dss = config.depth;
|
|
dss.zwe = 0;
|
|
const OMBlendSelector blend(GSHWDrawConfig::ColorMaskSelector(1),
|
|
GSHWDrawConfig::BlendState(true, CONST_ONE, CONST_ONE, 3 /* MIN */, CONST_ONE, CONST_ZERO, false, 0));
|
|
SetupOM(dss, blend, 0);
|
|
OMSetRenderTargets(primid_texture, config.ds, &config.scissor, read_only_dsv);
|
|
DrawIndexedPrimitive();
|
|
|
|
config.ps.date = 3;
|
|
config.alpha_second_pass.ps.date = 3;
|
|
SetupPS(config.ps, nullptr, config.sampler);
|
|
PSSetShaderResource(3, primid_texture);
|
|
}
|
|
else if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::Depth)
|
|
{
|
|
PSSetShaderResource(2, config.rt);
|
|
config.rt = (config.rt == m_state.cached_rt_view) ? nullptr : m_state.cached_rt_view;
|
|
OMSetRenderTargets(config.rt, config.ds, &config.scissor, read_only_dsv);
|
|
}
|
|
|
|
// Avoid changing framebuffer just to switch from rt+depth to rt and vice versa.
|
|
GSTexture* draw_rt = colclip_rt ? colclip_rt : config.rt;
|
|
GSTexture* draw_ds = config.ds;
|
|
// Make sure no tex is bound as both rtv and srv at the same time.
|
|
// All conflicts should've been taken care of by PSUnbindConflictingSRVs.
|
|
// It is fine to do the optimiation when on slot 0 tex is fb, tex is ds, and slot 2 sw blend as they are copies bound to srv.
|
|
if (!draw_rt && draw_ds && m_state.rt_view && m_state.cached_rt_view && m_state.rt_view == *static_cast<GSTexture11*>(m_state.cached_rt_view) &&
|
|
m_state.cached_dsv == draw_ds && config.tex != m_state.cached_rt_view && m_state.cached_rt_view->GetSize() == draw_ds->GetSize())
|
|
{
|
|
draw_rt = m_state.cached_rt_view;
|
|
}
|
|
else if (!draw_ds && draw_rt && m_state.dsv && m_state.cached_dsv && m_state.dsv == *static_cast<GSTexture11*>(m_state.cached_dsv) &&
|
|
m_state.cached_rt_view == draw_rt && config.tex != m_state.cached_dsv && m_state.cached_dsv->GetSize() == draw_rt->GetSize())
|
|
{
|
|
draw_ds = m_state.cached_dsv;
|
|
}
|
|
|
|
GSTexture* draw_rt_clone = nullptr;
|
|
|
|
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)))
|
|
{
|
|
// Requires a copy of the RT.
|
|
// Used as "bind rt" flag when texture barrier is unsupported for tex is fb.
|
|
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true);
|
|
|
|
if (!draw_rt_clone)
|
|
Console.Warning("D3D11: Failed to allocate temp texture for RT copy.");
|
|
}
|
|
|
|
OMSetRenderTargets(draw_rt, draw_ds, &config.scissor, read_only_dsv);
|
|
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant);
|
|
|
|
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
|
|
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && multidraw_fb_copy)
|
|
m_ctx->ClearDepthStencilView(*static_cast<GSTexture11*>(draw_ds), D3D11_CLEAR_STENCIL, 0.0f, 1);
|
|
|
|
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier, false);
|
|
|
|
if (config.blend_multi_pass.enable)
|
|
{
|
|
config.ps.no_color1 = config.blend_multi_pass.no_color1;
|
|
config.ps.blend_hw = config.blend_multi_pass.blend_hw;
|
|
config.ps.dither = config.blend_multi_pass.dither;
|
|
SetupPS(config.ps, &config.cb_ps, config.sampler);
|
|
SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend_multi_pass.blend), config.blend_multi_pass.blend.constant);
|
|
DrawIndexedPrimitive();
|
|
}
|
|
|
|
if (config.alpha_second_pass.enable)
|
|
{
|
|
if (config.cb_ps.FogColor_AREF.a != config.alpha_second_pass.ps_aref)
|
|
{
|
|
config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref;
|
|
SetupPS(config.alpha_second_pass.ps, &config.cb_ps, config.sampler);
|
|
}
|
|
else
|
|
{
|
|
// ps cbuffer hasn't changed, so don't bother checking
|
|
SetupPS(config.alpha_second_pass.ps, nullptr, config.sampler);
|
|
}
|
|
|
|
SetupOM(config.alpha_second_pass.depth, OMBlendSelector(config.alpha_second_pass.colormask, config.blend), config.blend.constant);
|
|
SendHWDraw(config, draw_rt_clone, draw_rt, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier, true);
|
|
}
|
|
|
|
if (draw_rt_clone)
|
|
Recycle(draw_rt_clone);
|
|
|
|
if (primid_texture)
|
|
Recycle(primid_texture);
|
|
|
|
if (colclip_rt)
|
|
{
|
|
config.colclip_update_area = config.colclip_update_area.runion(config.drawarea);
|
|
|
|
if (config.colclip_mode == GSHWDrawConfig::ColClipMode::ResolveOnly || config.colclip_mode == GSHWDrawConfig::ColClipMode::ConvertAndResolve)
|
|
{
|
|
const GSVector2i size = config.rt->GetSize();
|
|
const GSVector4 dRect(config.colclip_update_area);
|
|
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
|
|
StretchRect(colclip_rt, sRect, config.rt, dRect, ShaderConvert::COLCLIP_RESOLVE, false);
|
|
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
|
Recycle(colclip_rt);
|
|
|
|
g_gs_device->SetColorClipTexture(nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GSDevice11::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, const bool one_barrier, const bool full_barrier, const bool skip_first_barrier)
|
|
{
|
|
if (draw_rt_clone)
|
|
{
|
|
#ifdef PCSX2_DEVBUILD
|
|
if ((one_barrier || full_barrier) && !config.ps.IsFeedbackLoop()) [[unlikely]]
|
|
Console.Warning("D3D11: Possible unnecessary copy detected.");
|
|
#endif
|
|
|
|
auto CopyAndBind = [&](GSVector4i drawarea) {
|
|
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
|
|
if (one_barrier || full_barrier)
|
|
PSSetShaderResource(2, draw_rt_clone);
|
|
if (config.tex && config.tex == config.rt)
|
|
PSSetShaderResource(0, draw_rt_clone);
|
|
};
|
|
|
|
if (m_features.multidraw_fb_copy && full_barrier)
|
|
{
|
|
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
|
|
const u32 indices_per_prim = config.indices_per_prim;
|
|
|
|
pxAssert(config.drawlist && !config.drawlist->empty());
|
|
pxAssert(config.drawlist_bbox && static_cast<u32>(config.drawlist_bbox->size()) == draw_list_size);
|
|
|
|
for (u32 n = 0, p = 0; n < draw_list_size; n++)
|
|
{
|
|
const u32 count = (*config.drawlist)[n] * indices_per_prim;
|
|
|
|
GSVector4i bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea);
|
|
|
|
// Copy only the part needed by the draw.
|
|
CopyAndBind(bbox);
|
|
|
|
DrawIndexedPrimitive(p, count);
|
|
p += count;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
|
|
if (!skip_first_barrier)
|
|
CopyAndBind(config.drawarea);
|
|
}
|
|
|
|
DrawIndexedPrimitive();
|
|
}
|