Compare commits

...

19 Commits

Author SHA1 Message Date
refractionpcsx2
3ae707464c GS/HW: Ignore point draws for a single black pixel in the top left 2026-01-26 03:19:49 +01:00
refractionpcsx2
00ef419023 GS/TC: Enhance depth clear behaviour to improve heuristics 2026-01-26 03:19:49 +01:00
refractionpcsx2
448a279cd4 GS/HW: Further improve clear behaviour 2026-01-26 03:19:49 +01:00
refractionpcsx2
70e13adfde GS/TC: Improve wrapping behaviour for inside target lookups 2026-01-26 03:19:49 +01:00
refractionpcsx2
25bc280818 GS/HW: Clear GS memory if not zero or target overwrite 2026-01-26 03:19:49 +01:00
TheLastRar
32e073002a [ci skip] CI/Linux: Fix contribute link 2026-01-25 19:55:48 +01:00
PCSX2 Bot
911314e948 [ci skip] Qt: Update Base Translation. 2026-01-25 02:38:54 +01:00
lightningterror
a73fcb343c GS: Default to DX12 on NV/AMD.
DX12 trades blows with Vulkan on AMD depending on cpu usage and will be stable on RDNA 3 so let's default to it.

NVIDIA: 590 drivers on Nvidia are bad causing performance regressions so let's switch to DX12 as the default.
2026-01-24 20:28:04 +01:00
TheLastRar
251b2960f8 Revert "GS/VK: Use the compute queues for present" 2026-01-24 20:27:30 +01:00
refractionpcsx2
5bb99105c3 GS/HW: Clear downscale source on draw to avoid cross game corruption 2026-01-24 18:32:43 +01:00
lightningterror
fa6e1b0949 GS/DX12: Don't end render passes on enhanced barriers. 2026-01-23 11:19:11 +01:00
oltolm
4297918ce2 cmake: remove Qt6::GuiPrivate dependency on WIN32 and APPLE 2026-01-21 18:56:00 -05:00
PCSX2 Bot
629a58469b [ci skip] Qt: Update Base Translation. 2026-01-21 09:06:57 +01:00
TheLastRar
5ff1eed28c Deps: Add DX12 Agility SDK 2026-01-21 09:06:44 +01:00
TheLastRar
4506ff1c46 GS/DX12: Use Enhanced Barriers API 2026-01-21 09:06:44 +01:00
TheLastRar
4daa455524 GS/VK: Fix the Nvidia present fix 2026-01-20 21:54:14 +01:00
TheLastRar
433e99baec GS/VK: Hackfix impatient present on Nvidia 2026-01-20 08:03:00 +01:00
Ty
a1ac6662d3 QT: Focus the main window when the DisplayWidget is focused to get its toolbar 2026-01-19 09:31:55 -05:00
JordanTheToaster
87366cda9d 3rdparty: Update rcheevos to 12.2.1 2026-01-18 10:35:14 -05:00
24 changed files with 1126 additions and 422 deletions

View File

@@ -20,7 +20,7 @@
<url type="donation">https://github.com/sponsors/PCSX2</url>
<url type="faq">https://pcsx2.net/docs/</url>
<url type="help">https://pcsx2.net/discord</url>
<url type="contribute">https://github.com/PCSX2/pcsx2/blob/master/.github/CONTRIBUTING.md</url>
<url type="contribute">https://pcsx2.net/docs/category/contributing</url>
<url type="translate">https://crowdin.com/project/pcsx2-emulator</url>
<url type="contact">https://mastodon.social/@PCSX2</url>
<screenshots>

View File

@@ -65,6 +65,8 @@ set SHADERC_GLSLANG=7a47e2531cb334982b2a2dd8513dca0a3de4373d
set SHADERC_SPIRVHEADERS=b824a462d4256d720bebb40e78b9eb8f78bbb305
set SHADERC_SPIRVTOOLS=971a7b6e8d7740035bbff089bbbf9f42951ecfd5
set AGILITYSDK=1.618.5
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 174d9e53402e1bf9ec7277e22ec199ba3e55a6be2c0740cb18c0ee9850fc8c34 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip 31490c781bacd2ce56862555b11c51c964977c39f14f51b817dfaecf0be089fe || goto error
call :downloadfile "lpng%LIBPNG%.zip" https://download.sourceforge.net/libpng/lpng1653.zip 140566abc64bb2320cb35f1d154d1cb3eb7174a12234d33bfdffb446bdc0a1d2 || goto error
@@ -84,6 +86,7 @@ call :downloadfile "zstd-%ZSTD%.zip" "https://github.com/facebook/zstd/archive/r
call :downloadfile "KDDockWidgets-%KDDOCKWIDGETS%.zip" "https://github.com/KDAB/KDDockWidgets/archive/v%KDDOCKWIDGETS%.zip" 47ddb48197872055f0adf8e90a7235f8a3b795ca1ee3a28ac2c504c673ae3806 || goto error
call :downloadfile "plutovg-%PLUTOVG%.zip" "https://github.com/sammycage/plutovg/archive/v%PLUTOVG%.zip" 4fe4e48f28aa80171b2166d45c0976ab0f21eecedb52cd4c3ef73b5afb48fac9 || goto error
call :downloadfile "plutosvg-%PLUTOSVG%.zip" "https://github.com/sammycage/plutosvg/archive/v%PLUTOSVG%.zip" 82dee2c57ad712bdd6d6d81d3e76249d89caa4b5a4214353660fd5adff12201a || goto error
call :downloadfile: "agility-sdk-%AGILITYSDK%.nupkg" "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/%AGILITYSDK%" 0027fc24f947c48dbded13ada7d280be221eb651644e23a8a476f0f1f0a079dd || goto error
call :downloadfile "shaderc-%SHADERC%.zip" "https://github.com/google/shaderc/archive/refs/tags/v%SHADERC%.zip" fab72d1a38eacea52710d18edb95dfd75db894ad869675d07a1eb26827da9b15 || goto error
call :downloadfile "shaderc-glslang-%SHADERC_GLSLANG%.zip" "https://github.com/KhronosGroup/glslang/archive/%SHADERC_GLSLANG%.zip" 4a118247386ffba9160113f146f2189ba5abe3995db357114d7112ede6bd3cd1 || goto error
@@ -305,6 +308,20 @@ cmake --build build --parallel || goto error
ninja -C build install || goto error
cd .. || goto error
echo Unpacking Agility SDK
rmdir /S /Q "agility-sdk-%AGILITYSDK%"
%SEVENZIP% x -o"agility-sdk-%AGILITYSDK%" "agility-sdk-%AGILITYSDK%.nupkg" || goto error
cd "agility-sdk-%AGILITYSDK%" || goto error
if not exist "%INSTALLDIR%\bin\D3D12" (
mkdir "%INSTALLDIR%\bin\D3D12" || goto error
)
rem the pdbs aren't in the list of distributable files, so only copy the dlls.
copy "build\native\bin\arm64\D3D12Core.dll" "%INSTALLDIR%\bin\D3D12\D3D12Core.dll" || goto error
if %DEBUG%==1 (
copy "build\native\bin\arm64\d3d12SDKLayers.dll" "%INSTALLDIR%\bin\D3D12\d3d12SDKLayers.dll" || goto error
)
cd .. || goto error
echo Building shaderc...
rmdir /S /Q "shaderc-%SHADERC%"
%SEVENZIP% x "shaderc-%SHADERC%.zip" || goto error

View File

@@ -63,6 +63,8 @@ set SHADERC_GLSLANG=7a47e2531cb334982b2a2dd8513dca0a3de4373d
set SHADERC_SPIRVHEADERS=b824a462d4256d720bebb40e78b9eb8f78bbb305
set SHADERC_SPIRVTOOLS=971a7b6e8d7740035bbff089bbbf9f42951ecfd5
set AGILITYSDK=1.618.5
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 174d9e53402e1bf9ec7277e22ec199ba3e55a6be2c0740cb18c0ee9850fc8c34 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip 31490c781bacd2ce56862555b11c51c964977c39f14f51b817dfaecf0be089fe || goto error
call :downloadfile "lpng%LIBPNG%.zip" https://download.sourceforge.net/libpng/lpng1653.zip 140566abc64bb2320cb35f1d154d1cb3eb7174a12234d33bfdffb446bdc0a1d2 || goto error
@@ -82,6 +84,7 @@ call :downloadfile "zstd-%ZSTD%.zip" "https://github.com/facebook/zstd/archive/r
call :downloadfile "KDDockWidgets-%KDDOCKWIDGETS%.zip" "https://github.com/KDAB/KDDockWidgets/archive/v%KDDOCKWIDGETS%.zip" 47ddb48197872055f0adf8e90a7235f8a3b795ca1ee3a28ac2c504c673ae3806 || goto error
call :downloadfile "plutovg-%PLUTOVG%.zip" "https://github.com/sammycage/plutovg/archive/v%PLUTOVG%.zip" 4fe4e48f28aa80171b2166d45c0976ab0f21eecedb52cd4c3ef73b5afb48fac9 || goto error
call :downloadfile "plutosvg-%PLUTOSVG%.zip" "https://github.com/sammycage/plutosvg/archive/v%PLUTOSVG%.zip" 82dee2c57ad712bdd6d6d81d3e76249d89caa4b5a4214353660fd5adff12201a || goto error
call :downloadfile: "agility-sdk-%AGILITYSDK%.nupkg" "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/%AGILITYSDK%" 0027fc24f947c48dbded13ada7d280be221eb651644e23a8a476f0f1f0a079dd || goto error
call :downloadfile "shaderc-%SHADERC%.zip" "https://github.com/google/shaderc/archive/refs/tags/v%SHADERC%.zip" fab72d1a38eacea52710d18edb95dfd75db894ad869675d07a1eb26827da9b15 || goto error
call :downloadfile "shaderc-glslang-%SHADERC_GLSLANG%.zip" "https://github.com/KhronosGroup/glslang/archive/%SHADERC_GLSLANG%.zip" 4a118247386ffba9160113f146f2189ba5abe3995db357114d7112ede6bd3cd1 || goto error
@@ -302,6 +305,20 @@ cmake --build build --parallel || goto error
ninja -C build install || goto error
cd .. || goto error
echo Unpacking Agility SDK
rmdir /S /Q "agility-sdk-%AGILITYSDK%"
%SEVENZIP% x -o"agility-sdk-%AGILITYSDK%" "agility-sdk-%AGILITYSDK%.nupkg" || goto error
cd "agility-sdk-%AGILITYSDK%" || goto error
if not exist "%INSTALLDIR%\bin\D3D12" (
mkdir "%INSTALLDIR%\bin\D3D12" || goto error
)
rem the pdbs aren't in the list of distributable files, so only copy the dlls.
copy "build\native\bin\x64\D3D12Core.dll" "%INSTALLDIR%\bin\D3D12\D3D12Core.dll" || goto error
if %DEBUG%==1 (
copy "build\native\bin\x64\d3d12SDKLayers.dll" "%INSTALLDIR%\bin\D3D12\d3d12SDKLayers.dll" || goto error
)
cd .. || goto error
echo Building shaderc...
rmdir /S /Q "shaderc-%SHADERC%"
%SEVENZIP% x "shaderc-%SHADERC%.zip" || goto error

View File

@@ -1,3 +1,7 @@
# v12.2.1
* fix parsing of leaderboards with comparisons in legacy-formatted values
* fix validation warning on long AddSource chains
# v12.2.0
* add rc_client_create_subset_list
* add rc_client_begin_fetch_game_titles

View File

@@ -9,7 +9,7 @@ RC_BEGIN_C_DECLS
#define RCHEEVOS_VERSION_MAJOR 12
#define RCHEEVOS_VERSION_MINOR 2
#define RCHEEVOS_VERSION_PATCH 0
#define RCHEEVOS_VERSION_PATCH 1
#define RCHEEVOS_MAKE_VERSION(major, minor, patch) (major * 1000000 + minor * 1000 + patch)
#define RCHEEVOS_VERSION RCHEEVOS_MAKE_VERSION(RCHEEVOS_VERSION_MAJOR, RCHEEVOS_VERSION_MINOR, RCHEEVOS_VERSION_PATCH)

View File

@@ -510,6 +510,7 @@ static void rc_combine_ranges(uint32_t* min_val, uint32_t* max_val, uint8_t oper
break;
case RC_OPERATOR_ADD:
case RC_OPERATOR_ADD_ACCUMULATOR:
if (*min_val > *max_val) { /* underflow occurred */
*max_val += oper_max_val;
}
@@ -522,6 +523,7 @@ static void rc_combine_ranges(uint32_t* min_val, uint32_t* max_val, uint8_t oper
break;
case RC_OPERATOR_SUB:
case RC_OPERATOR_SUB_ACCUMULATOR:
*min_val -= oper_max_val;
*max_val -= oper_min_val;
break;

View File

@@ -180,9 +180,11 @@ static void rc_parse_legacy_value(rc_value_t* self, const char** memaddr, rc_par
return;
}
if (!rc_operator_is_modifying(cond->oper)) {
parse->offset = RC_INVALID_OPERATOR;
return;
if (cond->type == RC_CONDITION_MEASURED && !rc_operator_is_modifying(cond->oper)) {
/* ignore non-modifying operator on measured clause. if it were parsed as an AddSource
* or SubSource, that would have already happened in rc_parse_condition_internal, and
* legacy formatted values are essentially a series of AddSources. */
cond->oper = RC_OPERATOR_NONE;
}
rc_condition_update_parse_state(cond, parse);

View File

@@ -110,8 +110,10 @@ disable_compiler_warnings_for_target(speex)
if(ENABLE_QT_UI)
find_package(Qt6 6.10.0 COMPONENTS CoreTools Core GuiTools Gui WidgetsTools Widgets LinguistTools REQUIRED)
if (Qt6_VERSION VERSION_GREATER_EQUAL 6.10.0)
find_package(Qt6 COMPONENTS CorePrivate GuiPrivate WidgetsPrivate REQUIRED)
if(NOT WIN32 AND NOT APPLE)
if (Qt6_VERSION VERSION_GREATER_EQUAL 6.10.0)
find_package(Qt6 COMPONENTS CorePrivate GuiPrivate WidgetsPrivate REQUIRED)
endif()
endif()
# The docking system for the debugger.

View File

@@ -28,6 +28,10 @@
<DepsDLLs Include="$(DepsBinDir)plutovg.dll" />
<DepsDLLs Include="$(DepsBinDir)plutosvg.dll" />
</ItemGroup>
<ItemGroup>
<AgilityDLLs Condition="Exists('$(DepsBinDir)D3D12\D3D12Core.dll')" Include="$(DepsBinDir)D3D12\D3D12Core.dll" />
<AgilityDLLs Condition="Exists('$(DepsBinDir)D3D12\d3d12SDKLayers.dll')" Include="$(DepsBinDir)D3D12\d3d12SDKLayers.dll" />
</ItemGroup>
<Target Name="DepsListPDBs"
AfterTargets="Build">
<ItemGroup>
@@ -56,4 +60,15 @@
SkipUnchangedFiles="true"
/>
</Target>
<Target Name="DepsCopyAgilityDLLs"
AfterTargets="Build"
Inputs="@(AgilityDLLs)"
Outputs="@(AgilityDLLs -> '$(OutDir)D3D12\%(RecursiveDir)%(Filename)%(Extension)')">
<Message Text="Copying Agility SDK DLLs" Importance="High" />
<Copy
SourceFiles="@(AgilityDLLs)"
DestinationFolder="$(OutDir)D3D12"
SkipUnchangedFiles="true"
/>
</Target>
</Project>

View File

@@ -289,11 +289,14 @@ target_link_libraries(pcsx2-qt PRIVATE
PCSX2
Qt6::Core
Qt6::Gui
Qt6::GuiPrivate
Qt6::Widgets
KDAB::kddockwidgets
)
if(NOT WIN32 AND NOT APPLE)
target_link_libraries(pcsx2-qt PRIVATE Qt6::GuiPrivate)
endif()
# Our Qt builds may have exceptions on, so force them off.
target_compile_definitions(pcsx2-qt PRIVATE QT_NO_EXCEPTIONS)

View File

@@ -26,8 +26,6 @@
#if defined(_WIN32)
#include "common/RedtapeWindows.h"
#elif !defined(APPLE)
#include <qpa/qplatformnativeinterface.h>
#endif
DisplaySurface::DisplaySurface()
@@ -477,6 +475,13 @@ bool DisplaySurface::eventFilter(QObject* object, QEvent* event)
}
return false;
case QEvent::FocusIn:
// macOS: When we (the display window) get focus from another window with a toolbar we update to the MainWindow toolbar.
// This is because we are a different native window from our MainWindow. So, whenever we get focus, focus our MainWindow.
// That way macOS will show the MainWindow toolbar when you click from the debugger / log window to the game.
if (auto* w = qobject_cast<QWidget*>(object))
w->window()->activateWindow();
return false;
default:
return false;
}

View File

@@ -11888,12 +11888,12 @@ This action cannot be undone.</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2772"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2713"/>
<source>Stencil buffers and texture barriers are both unavailable, this will break some graphical effects.</source>
<translation type="unfinished"></translation>
</message>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="5114"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="5055"/>
<source>Spin GPU During Readbacks is enabled, but calibrated timestamps are unavailable. This might be really slow.</source>
<translation type="unfinished"></translation>
</message>
@@ -11995,7 +11995,7 @@ Please see our official documentation for more information.</source>
<context>
<name>GSDeviceVK</name>
<message>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2137"/>
<location filename="../../pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp" line="2078"/>
<source>Your GPU does not support the required Vulkan features.</source>
<translation type="unfinished"></translation>
</message>

View File

@@ -1308,6 +1308,12 @@ function(setup_main_executable target)
install(FILES "${DEPS_BINDIR}/${SYMBOL_TO_COPY}" DESTINATION "${CMAKE_SOURCE_DIR}/bin" OPTIONAL)
endforeach()
set(AGILITY_DIR "${CMAKE_SOURCE_DIR}/deps/bin/D3D12")
set(AGILITY_DEPS_TO_COPY D3D12Core.dll d3d12SDKLayers.dll)
foreach(AGILITY_DEP_TO_COPY ${AGILITY_DEPS_TO_COPY})
install(FILES "${DEPS_BINDIR}/D3D12/${AGILITY_DEP_TO_COPY}" DESTINATION "${CMAKE_SOURCE_DIR}/bin/D3D12" OPTIONAL)
endforeach()
get_target_property(WINDEPLOYQT_EXE Qt6::windeployqt IMPORTED_LOCATION)
install(CODE "execute_process(COMMAND \"${WINDEPLOYQT_EXE}\" \"${CMAKE_SOURCE_DIR}/bin/$<TARGET_FILE_NAME:${target}>\" --plugindir \"${CMAKE_SOURCE_DIR}/bin/QtPlugins\" --pdb --no-compiler-runtime --no-system-d3d-compiler --no-system-dxc-compiler --no-translations COMMAND_ERROR_IS_FATAL ANY)")
install(CODE "file(WRITE \"${CMAKE_SOURCE_DIR}/bin/qt.conf\" \"[Paths]\\nPlugins = ./QtPlugins\")")

View File

@@ -5496,6 +5496,11 @@ bool GSState::IsOpaque()
return true;
const GSDrawingContext* context = m_context;
const u32 fmsk = GSLocalMemory::m_psm[context->FRAME.PSM].fmsk;
// If we aren't drawing color, it's equivilant to opaque.
if ((context->FRAME.FBMSK & fmsk) == (fmsk & 0x00FFFFFF))
return true;
int amin = 0;
int amax = 0xff;

View File

@@ -426,7 +426,8 @@ GSRendererType D3D::GetPreferredRenderer()
if (!feature_level.has_value())
return GSRendererType::DX11;
else if (feature_level == D3D_FEATURE_LEVEL_12_0)
return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::OGL;
//return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::OGL;
return GSRendererType::DX12;
else if (feature_level == D3D_FEATURE_LEVEL_11_0)
return GSRendererType::OGL;
else
@@ -439,7 +440,8 @@ GSRendererType D3D::GetPreferredRenderer()
if (!feature_level.has_value())
return GSRendererType::DX11;
else if (feature_level == D3D_FEATURE_LEVEL_12_0)
return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::DX12;
//return check_vulkan_supported() ? GSRendererType::VK : GSRendererType::DX12;
return GSRendererType::DX12;
else if (feature_level == D3D_FEATURE_LEVEL_11_1)
return GSRendererType::DX12;
else

View File

@@ -22,8 +22,8 @@ D3D12StreamBuffer::~D3D12StreamBuffer()
bool D3D12StreamBuffer::Create(u32 size)
{
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
D3D12MA::ALLOCATION_DESC allocationDesc = {};
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
@@ -31,8 +31,13 @@ bool D3D12StreamBuffer::Create(u32 size)
wil::com_ptr_nothrow<ID3D12Resource> buffer;
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
HRESULT hr;
if (GSDevice12::GetInstance()->UseEnhancedBarriers())
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocationDesc, &resource_desc.desc1,
D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
else
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc.desc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
pxAssertMsg(SUCCEEDED(hr), "Allocate buffer");
if (FAILED(hr))
return false;

View File

@@ -19,6 +19,8 @@
#include "common/ScopedGuard.h"
#include "common/SmallString.h"
#include "common/StringUtil.h"
#include "common/FileSystem.h"
#include "common/Path.h"
#include "D3D12MemAlloc.h"
#include "imgui.h"
@@ -158,6 +160,83 @@ u32 GSDevice12::GetAdapterVendorID() const
return desc.VendorId;
}
uint SDKVersion(const std::string& path)
{
// The Agility SDK version is embeded as the minor file version.
// This is only true for the redist files, not the OS files.
// Alternativly, D3D12Core also exports its D3D12SDKVersion.
std::wstring wpath = FileSystem::GetWin32Path(path);
const DWORD size = GetFileVersionInfoSizeW(wpath.c_str(), nullptr);
if (size == 0)
return 0;
BOOL ret;
std::vector<char> info_data(size);
ret = GetFileVersionInfoW(wpath.c_str(), 0, size, info_data.data());
if (!ret)
return 0;
uint len;
VS_FIXEDFILEINFO* file_ver;
ret = VerQueryValueW(info_data.data(), L"\\", reinterpret_cast<void**>(&file_ver), &len);
if (!ret)
return 0;
return file_ver->dwFileVersionMS & 0xFFFF;
}
void GSDevice12::LoadAgilitySDK()
{
static bool agility_loaded = false;
if (agility_loaded)
return;
HRESULT hr;
// See https://microsoft.github.io/DirectX-Specs/d3d/IndependentDevices.html
ComPtr<ID3D12SDKConfiguration1> sdk_configuration;
hr = D3D12GetInterface(CLSID_D3D12SDKConfiguration, IID_PPV_ARGS(sdk_configuration.put()));
if (FAILED(hr))
{
Console.Error("D3D12: Agility SDK configuration is not available");
return;
}
std::string sdk_path = Path::Combine(Path::GetDirectory(FileSystem::GetProgramPath()), "\\D3D12\\");
std::string core_path = Path::Combine(sdk_path, "D3D12Core.dll");
if (!FileSystem::FileExists(core_path.c_str()))
return;
const uint agility_version = SDKVersion(core_path);
if (agility_version == 0)
return;
ComPtr<ID3D12DeviceFactory> device_factory;
// CreateDeviceFactory seems to use a utf8 string for the path.
// If the system has a newer SDK, then the system SDK seems to be returned instead.
hr = sdk_configuration->CreateDeviceFactory(agility_version,
StringUtil::WideStringToUTF8String(FileSystem::GetWin32Path(sdk_path)).c_str(), IID_PPV_ARGS(device_factory.put()));
if (FAILED(hr))
{
Console.ErrorFmt("D3D12: Unable to load provided Agility SDK {:08X}", hr);
return;
}
// Windows 10 (and older drivers on 11) will apply to the global state in ID3D12DeviceFactory::CreateDevice().
// To get consistant behaviour across all systems, always apply the global state.
// This also allows us to use the normal D3D12*() methods with the loaded agility SDK.
hr = device_factory->ApplyToGlobalState();
if (FAILED(hr))
{
Console.ErrorFmt("D3D12: Unable to apply provided Agility SDK {:08X}", hr);
return;
}
agility_loaded = true;
}
bool GSDevice12::CreateDevice(u32& vendor_id)
{
bool enable_debug_layer = GSConfig.UseDebugDevice;
@@ -171,6 +250,9 @@ bool GSDevice12::CreateDevice(u32& vendor_id)
HRESULT hr;
// Load the Agility SDK
LoadAgilitySDK();
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
if (enable_debug_layer)
{
@@ -191,6 +273,7 @@ bool GSDevice12::CreateDevice(u32& vendor_id)
// Intel Haswell doesn't actually support DX12 even tho the device is created which results in a crash,
// to get around this check if device can be created using feature level 12 (skylake+).
const bool isIntel = (vendor_id == 0x163C || vendor_id == 0x8086 || vendor_id == 0x8087);
// Create the actual device.
hr = D3D12CreateDevice(m_adapter.get(), isIntel ? D3D_FEATURE_LEVEL_12_0 : D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device));
if (FAILED(hr))
@@ -206,6 +289,14 @@ bool GSDevice12::CreateDevice(u32& vendor_id)
Console.Error("D3D12: Failed to get lookup adapter by device LUID");
}
ComPtr<ID3D12DeviceConfiguration> config = m_device.try_query<ID3D12DeviceConfiguration>();
int sdkVersion = 0;
if (config)
{
sdkVersion = config->GetDesc().SDKVersion;
Console.WriteLnFmt("D3D12: Agility version: {}", sdkVersion);
}
if (enable_debug_layer)
{
ComPtr<ID3D12InfoQueue> info_queue = m_device.try_query<ID3D12InfoQueue>();
@@ -218,14 +309,18 @@ bool GSDevice12::CreateDevice(u32& vendor_id)
}
D3D12_INFO_QUEUE_FILTER filter = {};
std::array<D3D12_MESSAGE_ID, 5> id_list{
std::array<D3D12_MESSAGE_ID, 6> id_list{
D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET,
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH,
D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE,
// The current OS version of D3D12 (616) has a validation bug
// This is fixed with Agility 1.618.4.
// For now, disable this warning untill the OS updates.
D3D12_MESSAGE_ID_INCOMPATIBLE_BARRIER_LAYOUT,
};
filter.DenyList.NumIDs = static_cast<UINT>(id_list.size());
filter.DenyList.NumIDs = static_cast<UINT>(sdkVersion < 618 ? id_list.size() : id_list.size() - 1);
filter.DenyList.pIDList = id_list.data();
info_queue->PushStorageFilter(&filter);
}
@@ -318,8 +413,17 @@ bool GSDevice12::CreateCommandLists()
if (FAILED(hr))
return false;
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(),
nullptr, IID_PPV_ARGS(res.command_lists[i].put()));
if (m_enhanced_barriers)
{
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(),
nullptr, IID_PPV_ARGS(res.command_lists[i].list7.put()));
res.command_lists[i].list4 = res.command_lists[i].list7;
}
else
{
hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[i].get(),
nullptr, IID_PPV_ARGS(res.command_lists[i].list4.put()));
}
if (FAILED(hr))
{
Console.Error("D3D12: Failed to create command list: %08X", hr);
@@ -327,7 +431,7 @@ bool GSDevice12::CreateCommandLists()
}
// Close the command lists, since the first thing we do is reset them.
hr = res.command_lists[i]->Close();
hr = res.command_lists[i].list4->Close();
pxAssertRel(SUCCEEDED(hr), "Closing new command list failed");
if (FAILED(hr))
return false;
@@ -365,7 +469,7 @@ void GSDevice12::MoveToNextCommandList()
// Begin command list.
res.command_allocators[1]->Reset();
res.command_lists[1]->Reset(res.command_allocators[1].get(), nullptr);
res.command_lists[1].list4->Reset(res.command_allocators[1].get(), nullptr);
res.descriptor_allocator.Reset();
if (res.sampler_allocator.ShouldReset())
res.sampler_allocator.Reset();
@@ -397,18 +501,18 @@ void GSDevice12::MoveToNextCommandList()
res.has_timestamp_query = m_gpu_timing_enabled;
if (m_gpu_timing_enabled)
{
res.command_lists[1]->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST);
}
ID3D12DescriptorHeap* heaps[2] = {
res.descriptor_allocator.GetDescriptorHeap(), res.sampler_allocator.GetDescriptorHeap()};
res.command_lists[1]->SetDescriptorHeaps(std::size(heaps), heaps);
res.command_lists[1].list4->SetDescriptorHeaps(std::size(heaps), heaps);
m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value));
}
ID3D12GraphicsCommandList4* GSDevice12::GetInitCommandList()
const D3D12CommandList& GSDevice12::GetInitCommandList()
{
CommandListResources& res = m_command_lists[m_current_command_list];
if (!res.init_command_list_used)
@@ -416,12 +520,12 @@ ID3D12GraphicsCommandList4* GSDevice12::GetInitCommandList()
[[maybe_unused]] HRESULT hr = res.command_allocators[0]->Reset();
pxAssertMsg(SUCCEEDED(hr), "Reset init command allocator failed");
res.command_lists[0]->Reset(res.command_allocators[0].get(), nullptr);
res.command_lists[0].list4->Reset(res.command_allocators[0].get(), nullptr);
pxAssertMsg(SUCCEEDED(hr), "Reset init command list failed");
res.init_command_list_used = true;
}
return res.command_lists[0].get();
return res.command_lists[0];
}
bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion)
@@ -432,16 +536,16 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion)
if (res.has_timestamp_query)
{
// write the timestamp back at the end of the cmdlist
res.command_lists[1]->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
res.command_lists[1].list4->EndQuery(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
(m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1);
res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
res.command_lists[1].list4->ResolveQueryData(m_timestamp_query_heap.get(), D3D12_QUERY_TYPE_TIMESTAMP,
m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, NUM_TIMESTAMP_QUERIES_PER_CMDLIST,
m_timestamp_query_buffer.get(), m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
}
if (res.init_command_list_used)
{
hr = res.command_lists[0]->Close();
hr = res.command_lists[0].list4->Close();
if (FAILED(hr))
{
Console.Error("D3D12: Closing init command list failed with HRESULT %08X", hr);
@@ -450,7 +554,7 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion)
}
// Close and queue command list.
hr = res.command_lists[1]->Close();
hr = res.command_lists[1].list4->Close();
if (FAILED(hr))
{
Console.Error("D3D12: Closing main command list failed with HRESULT %08X", hr);
@@ -459,12 +563,12 @@ bool GSDevice12::ExecuteCommandList(WaitType wait_for_completion)
if (res.init_command_list_used)
{
const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].get(), res.command_lists[1].get()};
const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].list4.get(), res.command_lists[1].list4.get()};
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
}
else
{
const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].get()};
const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].list4.get()};
m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
}
@@ -598,10 +702,14 @@ bool GSDevice12::CreateTimestampQuery()
}
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK};
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put()));
const D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, BUFFER_SIZE, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
if (m_enhanced_barriers)
hr = m_allocator->CreateResource3(&allocation_desc, &resource_desc.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr,
0, nullptr, m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put()));
else
hr = m_allocator->CreateResource(&allocation_desc, &resource_desc.desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
m_timestamp_query_allocation.put(), IID_PPV_ARGS(m_timestamp_query_buffer.put()));
if (FAILED(hr))
{
Console.Error("D3D12: CreateResource() for timestamp failed with %08X", hr);
@@ -638,15 +746,20 @@ bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_
{
// Try to place the fixed index buffer in GPU local memory.
// Use the staging buffer to copy into it.
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
const D3D12_RESOURCE_DESCU rd = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
const D3D12MA::ALLOCATION_DESC cpu_ad = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
ComPtr<ID3D12Resource> cpu_buffer;
ComPtr<D3D12MA::Allocation> cpu_allocation;
HRESULT hr = m_allocator->CreateResource(
&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
HRESULT hr;
if (m_enhanced_barriers)
hr = m_allocator->CreateResource3(
&cpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
else
hr = m_allocator->CreateResource(
&cpu_ad, &rd.desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
if (FAILED(hr))
return false;
@@ -662,21 +775,34 @@ bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_
cpu_buffer->Unmap(0, &write_range);
const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT};
hr = m_allocator->CreateResource(
&gpu_ad, &rd, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
if (m_enhanced_barriers)
hr = m_allocator->CreateResource3(
&gpu_ad, &rd.desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
else
hr = m_allocator->CreateResource(
&gpu_ad, &rd.desc, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
if (FAILED(hr))
return false;
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
GetInitCommandList().list4->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb.Transition.pResource = *gpu_buffer;
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
GetInitCommandList()->ResourceBarrier(1, &rb);
if (m_enhanced_barriers)
{
const D3D12_BUFFER_BARRIER barrier = {D3D12_BARRIER_SYNC_COPY, D3D12_BARRIER_SYNC_INDEX_INPUT,
D3D12_BARRIER_ACCESS_COPY_DEST, D3D12_BARRIER_ACCESS_INDEX_BUFFER, *gpu_buffer, 0, size};
const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_BUFFER, .NumBarriers = 1, .pBufferBarriers = &barrier};
GetInitCommandList().list7->Barrier(1, &group);
}
else
{
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb.Transition.pResource = *gpu_buffer;
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
GetInitCommandList().list4->ResourceBarrier(1, &rb);
}
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
return true;
@@ -768,7 +894,7 @@ void GSDevice12::Destroy()
{
GSDevice::Destroy();
if (GetCommandList())
if (GetCommandList().list4)
{
EndRenderPass();
ExecuteCommandList(true);
@@ -919,11 +1045,11 @@ bool GSDevice12::CreateSwapChain()
// Render a frame as soon as possible to clear out whatever was previously being displayed.
EndRenderPass();
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
const D3D12CommandList& cmdlist = GetCommandList();
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmdlist->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PRESENT);
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget);
cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::Present);
ExecuteCommandList(false);
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
return true;
@@ -950,7 +1076,7 @@ bool GSDevice12::CreateSwapChainRTV()
std::unique_ptr<GSTexture12> tex = GSTexture12::Adopt(std::move(backbuffer), GSTexture::Type::RenderTarget,
GSTexture::Format::Color, swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, 1,
swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_STATE_COMMON);
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, GSTexture12::ResourceState::Present);
if (!tex)
{
m_swap_chain_buffers.clear();
@@ -1122,18 +1248,18 @@ GSDevice::PresentResult GSDevice12::BeginPresent(bool frame_skip)
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
swap_chain_buf->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmdlist->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
cmdlist->OMSetRenderTargets(1, &swap_chain_buf->GetWriteDescriptor().cpu_handle, FALSE, nullptr);
const D3D12CommandList& cmdlist = GetCommandList();
swap_chain_buf->TransitionToState(cmdlist, GSTexture12::ResourceState::RenderTarget);
cmdlist.list4->ClearRenderTargetView(swap_chain_buf->GetWriteDescriptor(), s_present_clear_color.data(), 0, nullptr);
cmdlist.list4->OMSetRenderTargets(1, &swap_chain_buf->GetWriteDescriptor().cpu_handle, FALSE, nullptr);
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast<float>(m_window_info.surface_width),
static_cast<float>(m_window_info.surface_height), 0.0f, 1.0f};
const D3D12_RECT scissor{
0, 0, static_cast<LONG>(m_window_info.surface_width), static_cast<LONG>(m_window_info.surface_height)};
cmdlist->RSSetViewports(1, &vp);
cmdlist->RSSetScissorRects(1, &scissor);
cmdlist.list4->RSSetViewports(1, &vp);
cmdlist.list4->RSSetScissorRects(1, &scissor);
return PresentResult::OK;
}
@@ -1144,7 +1270,7 @@ void GSDevice12::EndPresent()
GSTexture12* swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer].get();
m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
swap_chain_buf->TransitionToState(GetCommandList(), D3D12_RESOURCE_STATE_PRESENT);
swap_chain_buf->TransitionToState(GSTexture12::ResourceState::Present);
if (!ExecuteCommandList(WaitType::None))
{
m_device_lost = true;
@@ -1187,7 +1313,7 @@ void GSDevice12::PushDebugGroup(const char* fmt, ...)
const UINT color = Palette(
++s_debug_scope_depth, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
PIXBeginEvent(GetCommandList(), color, "%s", buf.c_str());
PIXBeginEvent(GetCommandList().list4.get(), color, "%s", buf.c_str());
#endif
}
@@ -1199,7 +1325,7 @@ void GSDevice12::PopDebugGroup()
s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
PIXEndEvent(GetCommandList());
PIXEndEvent(GetCommandList().list4.get());
#endif
}
@@ -1230,7 +1356,7 @@ void GSDevice12::InsertDebugMessage(DebugMessageCategory category, const char* f
static_cast<BYTE>(fcolor[1] * 255.0f),
static_cast<BYTE>(fcolor[2] * 255.0f));
PIXSetMarker(GetCommandList(), color, "%s", buf.c_str());
PIXSetMarker(GetCommandList().list4.get(), color, "%s", buf.c_str());
#endif
}
@@ -1263,30 +1389,43 @@ bool GSDevice12::CheckFeatures(const u32& vendor_id)
Console.WriteLnFmt("D3D12: Programmable Sample Position: {}", m_programmable_sample_positions ? "Supported" : "Not Supported");
BOOL allow_tearing_supported = false;
const HRESULT hr = m_dxgi_factory->CheckFeatureSupport(
HRESULT hr = m_dxgi_factory->CheckFeatureSupport(
DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported));
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
D3D12_FEATURE_DATA_D3D12_OPTIONS12 device_options12 = {};
hr = m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &device_options12, sizeof(device_options12));
if (SUCCEEDED(hr))
{
Console.WriteLnFmt("D3D12: Enhanced Barriers: {}", device_options12.EnhancedBarriersSupported ? "Supported" : "Not Supported");
m_enhanced_barriers = device_options12.EnhancedBarriersSupported;
}
else
{
Console.WriteLnFmt("D3D12: Failed to check for Enhanced Barriers: 0x{:08x}", static_cast<unsigned long>(hr));
m_enhanced_barriers = false;
}
return true;
}
void GSDevice12::DrawPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
GetCommandList()->DrawInstanced(m_vertex.count, 1, m_vertex.start, 0);
GetCommandList().list4->DrawInstanced(m_vertex.count, 1, m_vertex.start, 0);
}
void GSDevice12::DrawIndexedPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
GetCommandList()->DrawIndexedInstanced(m_index.count, 1, m_index.start, m_vertex.start, 0);
GetCommandList().list4->DrawIndexedInstanced(m_index.count, 1, m_index.start, m_vertex.start, 0);
}
void GSDevice12::DrawIndexedPrimitive(int offset, int count)
{
pxAssert(offset + count <= (int)m_index.count);
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
GetCommandList()->DrawIndexedInstanced(count, 1, m_index.start + offset, m_vertex.start, 0);
GetCommandList().list4->DrawIndexedInstanced(count, 1, m_index.start + offset, m_vertex.start, 0);
}
void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_format, DXGI_FORMAT* srv_format,
@@ -1391,14 +1530,14 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
if (dTex12->GetType() != GSTexture::Type::DepthStencil)
{
dTex12->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
GetCommandList()->ClearRenderTargetView(
dTex12->TransitionToState(GSTexture12::ResourceState::RenderTarget);
GetCommandList().list4->ClearRenderTargetView(
dTex12->GetWriteDescriptor(), sTex12->GetUNormClearColor().v, 0, nullptr);
}
else
{
dTex12->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE);
GetCommandList()->ClearDepthStencilView(
dTex12->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil);
GetCommandList().list4->ClearDepthStencilView(
dTex12->GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, sTex12->GetClearDepth(), 0, 0, nullptr);
}
@@ -1418,12 +1557,12 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
EndRenderPass();
sTex12->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE);
sTex12->TransitionToState(GSTexture12::ResourceState::CopySrc);
sTex12->SetUseFenceCounter(GetCurrentFenceValue());
if (m_tfx_textures[0] && sTex12->GetSRVDescriptor() == m_tfx_textures[0])
PSSetShaderResource(0, nullptr, false);
dTex12->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST);
dTex12->TransitionToState(GSTexture12::ResourceState::CopyDst);
dTex12->SetUseFenceCounter(GetCurrentFenceValue());
D3D12_TEXTURE_COPY_LOCATION srcloc;
@@ -1440,13 +1579,13 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
const bool full_rt_copy = src_dst_rect_match && ((sTex12->IsDepthStencil() && !m_programmable_sample_positions) || (destX == 0 && destY == 0 && r.eq(src_rect)));
if (full_rt_copy)
{
GetCommandList()->CopyResource(dTex12->GetResource(), sTex12->GetResource());
GetCommandList().list4->CopyResource(dTex12->GetResource(), sTex12->GetResource());
}
else
{
const D3D12_BOX srcbox{static_cast<UINT>(r.left), static_cast<UINT>(r.top), 0u, static_cast<UINT>(r.right),
static_cast<UINT>(r.bottom), 1u};
GetCommandList()->CopyTextureRegion(&dstloc, destX, destY, 0, &srcloc, &srcbox);
GetCommandList().list4->CopyTextureRegion(&dstloc, destX, destY, 0, &srcloc, &srcbox);
}
dTex12->SetState(GSTexture::State::Dirty);
@@ -1560,10 +1699,10 @@ void GSDevice12::DrawMultiStretchRects(
{
GSTexture12* const stex = static_cast<GSTexture12*>(rects[i].src);
stex->CommitClear();
if (stex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
if (stex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
{
EndRenderPass();
stex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
stex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
}
@@ -1689,11 +1828,11 @@ void GSDevice12::BeginRenderPassForStretchRect(
void GSDevice12::DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect,
const ID3D12PipelineState* pipeline, bool linear, bool allow_discard)
{
if (sTex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
if (sTex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
{
// can't transition in a render pass
EndRenderPass();
sTex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
sTex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
SetUtilityRootSignature();
@@ -1770,14 +1909,14 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
if (has_input_0)
{
static_cast<GSTexture12*>(sTex[0])->CommitClear();
static_cast<GSTexture12*>(sTex[0])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(sTex[0])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
if (has_input_1)
{
static_cast<GSTexture12*>(sTex[1])->CommitClear();
static_cast<GSTexture12*>(sTex[1])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(sTex[1])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget);
// Upload constant to select YUV algo, but skip constant buffer update if we don't need it
if (feedback_write_2 || feedback_write_1 || sTex[0])
@@ -1830,7 +1969,7 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
if (sTex[0] == sTex[2])
{
// need a barrier here because of the render pass
static_cast<GSTexture12*>(sTex[2])->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(sTex[2])->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
}
@@ -1877,13 +2016,13 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex,
// this texture is going to get used as an input, so make sure we don't read undefined data
static_cast<GSTexture12*>(dTex)->CommitClear();
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb)
{
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::RenderTarget);
const GSVector4i rc = GSVector4i(dRect);
const GSVector4i dtex_rc = dTex->GetRect();
@@ -1899,7 +2038,7 @@ void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture*
EndRenderPass();
// this texture is going to get used as an input, so make sure we don't read undefined data
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4])
@@ -1918,7 +2057,7 @@ void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float para
DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize());
EndRenderPass();
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex)
@@ -1936,7 +2075,7 @@ void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex)
DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize());
EndRenderPass();
static_cast<GSTexture12*>(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
static_cast<GSTexture12*>(dTex)->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
bool GSDevice12::CompileCASPipelines()
@@ -2100,7 +2239,7 @@ void GSDevice12::RenderImGui()
D3D12DescriptorHandle handle = m_null_texture->GetSRVDescriptor();
if (tex)
{
tex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
handle = tex->GetSRVDescriptor();
}
@@ -2118,7 +2257,7 @@ void GSDevice12::RenderImGui()
if (ApplyUtilityState())
{
GetCommandList()->DrawIndexedInstanced(
GetCommandList().list4->DrawIndexedInstanced(
pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
}
}
@@ -2147,23 +2286,23 @@ bool GSDevice12::DoCAS(
}
}
ID3D12GraphicsCommandList* const cmdlist = GetCommandList();
const D3D12_RESOURCE_STATES old_state = sTex12->GetResourceState();
sTex12->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
dTex12->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
const D3D12CommandList& cmdlist = GetCommandList();
const GSTexture12::ResourceState old_state = sTex12->GetResourceState();
sTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::ComputeShaderResource);
dTex12->TransitionToState(cmdlist, GSTexture12::ResourceState::CASShaderUAV);
cmdlist->SetComputeRootSignature(m_cas_root_signature.get());
cmdlist->SetComputeRoot32BitConstants(
cmdlist.list4->SetComputeRootSignature(m_cas_root_signature.get());
cmdlist.list4->SetComputeRoot32BitConstants(
CAS_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, NUM_CAS_CONSTANTS, constants.data(), 0);
cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH);
cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH);
cmdlist->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get());
cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH);
cmdlist.list4->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH);
cmdlist.list4->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get());
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
static const int threadGroupWorkRegionDim = 16;
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
cmdlist->Dispatch(dispatchX, dispatchY, 1);
cmdlist.list4->Dispatch(dispatchX, dispatchY, 1);
sTex12->TransitionToState(cmdlist, old_state);
return true;
@@ -2243,9 +2382,9 @@ void GSDevice12::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
if (!InRenderPass())
{
if (vkRt)
vkRt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
vkRt->TransitionToState(GSTexture12::ResourceState::RenderTarget);
if (vkDs)
vkDs->TransitionToState(depth_read ? (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) : D3D12_RESOURCE_STATE_DEPTH_WRITE);
vkDs->TransitionToState(depth_read ? GSTexture12::ResourceState::DepthReadStencil : GSTexture12::ResourceState::DepthWriteStencil);
}
// This is used to set/initialize the framebuffer for tfx rendering.
@@ -2374,7 +2513,7 @@ bool GSDevice12::CreateNullTexture()
if (!m_null_texture)
return false;
m_null_texture->TransitionToState(GetCommandList(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
m_null_texture->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
D3D12::SetObjectName(m_null_texture->GetResource(), "Null texture");
return true;
}
@@ -3153,7 +3292,7 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion
if (was_in_render_pass)
{
// rebind everything except RT, because the RP does that for us
ApplyBaseState(m_dirty_flags & ~DIRTY_FLAG_RENDER_TARGET, GetCommandList());
ApplyBaseState(m_dirty_flags & ~DIRTY_FLAG_RENDER_TARGET, GetCommandList().list4.get());
m_dirty_flags &= ~DIRTY_BASE_STATE;
// restart render pass
@@ -3244,17 +3383,17 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state, boo
GSTexture12* dtex = static_cast<GSTexture12*>(sr);
if (check_state)
{
if (dtex->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE && InRenderPass())
if (dtex->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource && InRenderPass())
{
GL_INS("Ending render pass due to resource transition");
EndRenderPass();
}
dtex->CommitClear();
dtex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
dtex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
dtex->SetUseFenceCounter(GetCurrentFenceValue());
handle = feedback ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor();
handle = (feedback && !m_enhanced_barriers) ? dtex->GetFBLDescriptor() : dtex->GetSRVDescriptor();
}
else
{
@@ -3285,7 +3424,7 @@ void GSDevice12::SetUtilityRootSignature()
m_current_root_signature = RootSignature::Utility;
m_dirty_flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE | DIRTY_FLAG_SAMPLERS_DESCRIPTOR_TABLE | DIRTY_FLAG_PIPELINE;
GetCommandList()->SetGraphicsRootSignature(m_utility_root_signature.get());
GetCommandList().list4->SetGraphicsRootSignature(m_utility_root_signature.get());
}
void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle& sampler)
@@ -3295,7 +3434,7 @@ void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle&
{
GSTexture12* d12tex = static_cast<GSTexture12*>(dtex);
d12tex->CommitClear();
d12tex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
d12tex->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
d12tex->SetUseFenceCounter(GetCurrentFenceValue());
handle = d12tex->GetSRVDescriptor();
}
@@ -3333,7 +3472,7 @@ void GSDevice12::SetUtilityTexture(GSTexture* dtex, const D3D12DescriptorHandle&
void GSDevice12::SetUtilityPushConstants(const void* data, u32 size)
{
GetCommandList()->SetGraphicsRoot32BitConstants(
GetCommandList().list4->SetGraphicsRoot32BitConstants(
UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, (size + 3) / sizeof(u32), data, 0);
}
@@ -3396,13 +3535,13 @@ void GSDevice12::RenderTextureMipmap(
}
// *now* we don't have to worry about running out of anything.
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
const D3D12CommandList& cmdlist = GetCommandList();
if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
texture->TransitionSubresourceToState(
cmdlist, src_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
cmdlist, src_level, texture->GetResourceState(), GSTexture12::ResourceState::PixelShaderResource);
if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget)
texture->TransitionSubresourceToState(
cmdlist, dst_level, texture->GetResourceState(), D3D12_RESOURCE_STATE_RENDER_TARGET);
cmdlist, dst_level, texture->GetResourceState(), GSTexture12::ResourceState::RenderTarget);
// We set the state directly here.
constexpr u32 MODIFIED_STATE = DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | DIRTY_FLAG_RENDER_TARGET;
@@ -3410,14 +3549,14 @@ void GSDevice12::RenderTextureMipmap(
// Using a render pass is probably a bit overkill.
const D3D12_DISCARD_REGION discard_region = {0u, nullptr, dst_level, 1u};
cmdlist->DiscardResource(texture->GetResource(), &discard_region);
cmdlist->OMSetRenderTargets(1, &rtv_handle.cpu_handle, FALSE, nullptr);
cmdlist.list4->DiscardResource(texture->GetResource(), &discard_region);
cmdlist.list4->OMSetRenderTargets(1, &rtv_handle.cpu_handle, FALSE, nullptr);
const D3D12_VIEWPORT vp = {0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height), 0.0f, 1.0f};
cmdlist->RSSetViewports(1, &vp);
cmdlist.list4->RSSetViewports(1, &vp);
const D3D12_RECT scissor = {0, 0, static_cast<LONG>(dst_width), static_cast<LONG>(dst_height)};
cmdlist->RSSetScissorRects(1, &scissor);
cmdlist.list4->RSSetScissorRects(1, &scissor);
SetUtilityRootSignature();
SetPipeline(m_convert[static_cast<int>(ShaderConvert::COPY)].get());
@@ -3425,12 +3564,12 @@ void GSDevice12::RenderTextureMipmap(
GSVector4(0.0f, 0.0f, static_cast<float>(dst_width), static_cast<float>(dst_height)),
GSVector2i(dst_width, dst_height));
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
if (texture->GetResourceState() != GSTexture12::ResourceState::PixelShaderResource)
texture->TransitionSubresourceToState(
cmdlist, src_level, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, texture->GetResourceState());
if (texture->GetResourceState() != D3D12_RESOURCE_STATE_RENDER_TARGET)
cmdlist, src_level, GSTexture12::ResourceState::PixelShaderResource, texture->GetResourceState());
if (texture->GetResourceState() != GSTexture12::ResourceState::RenderTarget)
texture->TransitionSubresourceToState(
cmdlist, dst_level, D3D12_RESOURCE_STATE_RENDER_TARGET, texture->GetResourceState());
cmdlist, dst_level, GSTexture12::ResourceState::RenderTarget, texture->GetResourceState());
// Must destroy after current cmdlist.
DeferDescriptorDestruction(m_descriptor_heap_manager, &srv_handle);
@@ -3496,7 +3635,7 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b
}
}
GetCommandList()->BeginRenderPass(m_current_render_target ? 1 : 0,
GetCommandList().list4->BeginRenderPass(m_current_render_target ? 1 : 0,
m_current_render_target ? &rt : nullptr, m_current_depth_target ? &ds : nullptr,
(m_current_depth_target && m_current_depth_read_only) ? (D3D12_RENDER_PASS_FLAG_BIND_READ_ONLY_DEPTH) : D3D12_RENDER_PASS_FLAG_NONE);
}
@@ -3513,7 +3652,7 @@ void GSDevice12::EndRenderPass()
g_perfmon.Put(GSPerfMon::RenderPasses, 1);
GetCommandList()->EndRenderPass();
GetCommandList().list4->EndRenderPass();
}
void GSDevice12::SetViewport(const D3D12_VIEWPORT& viewport)
@@ -3675,7 +3814,7 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
flags |= DIRTY_FLAG_TEXTURES_DESCRIPTOR_TABLE_2;
}
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get();
if (m_current_root_signature != RootSignature::TFX)
{
@@ -3714,7 +3853,7 @@ bool GSDevice12::ApplyUtilityState(bool already_execed)
u32 flags = m_dirty_flags;
m_dirty_flags &= ~DIRTY_UTILITY_STATE;
ID3D12GraphicsCommandList* cmdlist = GetCommandList();
ID3D12GraphicsCommandList* cmdlist = GetCommandList().list4.get();
if (m_current_root_signature != RootSignature::Utility)
{
@@ -3846,11 +3985,37 @@ GSTexture12* GSDevice12::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe
config.alpha_second_pass.ps.date = 3;
// and bind the image to the primitive sampler
image->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
image->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
PSSetShaderResource(3, image, false);
return image;
}
void GSDevice12::FeedbackBarrier(const GSTexture12* texture)
{
if (m_enhanced_barriers)
{
// Enhanced barriers allows for single resource feedback.
const D3D12_BARRIER_SYNC sync = D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING;
const D3D12_BARRIER_ACCESS access = D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
const D3D12_TEXTURE_BARRIER barrier = {sync, sync, access, access, D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_LAYOUT_COMMON,
texture->GetResource(), {D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, 0, 0, 0, 0, 0}, D3D12_TEXTURE_BARRIER_FLAG_NONE};
const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_TEXTURE, .NumBarriers = 1, .pTextureBarriers = &barrier};
GetCommandList().list7->Barrier(1, &group);
}
else
{
// The DX12 spec notes "You may not read from, or consume, a write that occurred within the same render pass".
// The only exception being the implicit reads for render target blending or depth testing.
// Thus, in addition to a barrier, we need to end the render pass.
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
// While this may also be true before the barrier, we only write using the main resource.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {texture->GetResource(), nullptr};
GetCommandList().list4->ResourceBarrier(1, &barrier);
}
}
void GSDevice12::RenderHW(GSHWDrawConfig& config)
{
@@ -3871,12 +4036,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
if (m_features.texture_barrier && config.tex && (config.tex == config.rt) && !(config.require_one_barrier || config.require_full_barrier))
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {draw_rt->GetResource(), nullptr};
GetCommandList()->ResourceBarrier(1, &barrier);
FeedbackBarrier(draw_rt);
}
// now blit the colclip texture back to the original target
@@ -3887,7 +4047,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
GL_PUSH("Blit ColorClip back to RT");
EndRenderPass();
colclip_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
draw_rt = static_cast<GSTexture12*>(config.rt);
OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area);
@@ -4001,7 +4161,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
else if (draw_rt->GetState() == GSTexture::State::Dirty)
{
GL_PUSH_("ColorClip Render Target Setup");
draw_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
draw_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
}
// we're not drawing to the RT, so we can use it as a source
@@ -4064,10 +4224,10 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
{
EndRenderPass();
// Make sure the DSV is in writeable state
draw_ds->TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE);
draw_ds->TransitionToState(GSTexture12::ResourceState::DepthWriteStencil);
D3D12_RECT rect = {config.drawarea.left, config.drawarea.top, config.drawarea.left + config.drawarea.width(), config.drawarea.top + config.drawarea.height()};
GetCommandList()->ClearDepthStencilView(draw_ds->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, 0.0f, 1, 1, &rect);
GetCommandList().list4->ClearDepthStencilView(draw_ds->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, 0.0f, 1, 1, &rect);
}
// Begin render pass if new target or out of the area.
@@ -4167,7 +4327,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
GL_PUSH("Blit ColorClip back to RT");
EndRenderPass();
colclip_rt->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
colclip_rt->TransitionToState(GSTexture12::ResourceState::PixelShaderResource);
draw_rt = static_cast<GSTexture12*>(config.rt);
OMSetRenderTargets(draw_rt, draw_ds, config.colclip_update_area);
@@ -4223,12 +4383,7 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
// While this may also be true before the barrier, we only write using the main resource.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {draw_rt->GetResource(), nullptr};
GetCommandList()->ResourceBarrier(1, &barrier);
FeedbackBarrier(draw_rt);
if (BindDrawPipeline(pipe))
DrawIndexedPrimitive(p, count);
@@ -4242,11 +4397,7 @@ void GSDevice12::SendHWDraw(const PipelineSelector& pipe, const GSHWDrawConfig&
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
EndRenderPass();
// Specify null for the after resource as both resources are used after the barrier.
D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_ALIASING, D3D12_RESOURCE_BARRIER_FLAG_NONE};
barrier.Aliasing = {draw_rt->GetResource(), nullptr};
GetCommandList()->ResourceBarrier(1, &barrier);
FeedbackBarrier(draw_rt);
}
}

View File

@@ -21,6 +21,14 @@ namespace D3D12MA
class Allocator;
}
struct D3D12CommandList
{
// Main command list
wil::com_ptr_nothrow<ID3D12GraphicsCommandList4> list4;
// Enhanced barriers command list
wil::com_ptr_nothrow<ID3D12GraphicsCommandList7> list7;
};
class GSDevice12 final : public GSDevice
{
public:
@@ -42,6 +50,12 @@ public:
NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2,
};
union D3D12_RESOURCE_DESCU
{
D3D12_RESOURCE_DESC1 desc1;
D3D12_RESOURCE_DESC desc;
};
__fi IDXGIAdapter1* GetAdapter() const { return m_adapter.get(); }
__fi ID3D12Device* GetDevice() const { return m_device.get(); }
__fi ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.get(); }
@@ -50,14 +64,16 @@ public:
/// Returns the PCI vendor ID of the device, if known.
u32 GetAdapterVendorID() const;
bool UseEnhancedBarriers() const { return m_enhanced_barriers; }
/// Returns the current command list, commands can be recorded directly.
ID3D12GraphicsCommandList4* GetCommandList() const
const D3D12CommandList& GetCommandList() const
{
return m_command_lists[m_current_command_list].command_lists[1].get();
return m_command_lists[m_current_command_list].command_lists[1];
}
/// Returns the init command list for uploading.
ID3D12GraphicsCommandList4* GetInitCommandList();
const D3D12CommandList& GetInitCommandList();
/// Returns the per-frame SRV/CBV/UAV allocator.
D3D12DescriptorAllocator& GetDescriptorAllocator()
@@ -137,7 +153,7 @@ private:
struct CommandListResources
{
std::array<ComPtr<ID3D12CommandAllocator>, 2> command_allocators;
std::array<ComPtr<ID3D12GraphicsCommandList4>, 2> command_lists;
std::array<D3D12CommandList, 2> command_lists;
D3D12DescriptorAllocator descriptor_allocator;
D3D12GroupedSamplerAllocator<SAMPLER_GROUP_SIZE> sampler_allocator;
std::vector<std::pair<D3D12MA::Allocation*, ID3D12DeviceChild*>> pending_resources;
@@ -147,6 +163,8 @@ private:
bool has_timestamp_query = false;
};
void LoadAgilitySDK();
bool CreateDevice(u32& vendor_id);
bool CreateDescriptorHeaps();
bool CreateCommandLists();
@@ -296,6 +314,7 @@ private:
bool m_allow_tearing_supported = false;
bool m_using_allow_tearing = false;
bool m_is_exclusive_fullscreen = false;
bool m_enhanced_barriers = true;
bool m_device_lost = false;
ComPtr<ID3D12RootSignature> m_tfx_root_signature;
@@ -377,6 +396,8 @@ private:
ComPtr<ID3DBlob> GetUtilityVertexShader(const std::string& source, const char* entry_point);
ComPtr<ID3DBlob> GetUtilityPixelShader(const std::string& source, const char* entry_point);
void FeedbackBarrier(const GSTexture12* texture);
bool CheckFeatures(const u32& vendor_id);
bool CreateNullTexture();
bool CreateBuffers();

View File

@@ -19,7 +19,7 @@ GSTexture12::GSTexture12(Type type, Format format, int width, int height, int le
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation, const D3D12DescriptorHandle& srv_descriptor,
const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& ro_dsv_descriptor,
const D3D12DescriptorHandle& uav_descriptor, const D3D12DescriptorHandle& fbl_descriptor,
WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state)
WriteDescriptorType wdtype, bool simultaneous_texture, ResourceState resource_state)
: m_resource(std::move(resource))
, m_resource_fbl(std::move(resource_fbl))
, m_allocation(std::move(allocation))
@@ -31,6 +31,7 @@ GSTexture12::GSTexture12(Type type, Format format, int width, int height, int le
, m_write_descriptor_type(wdtype)
, m_dxgi_format(dxgi_format)
, m_resource_state(resource_state)
, m_simultaneous_tex(simultaneous_texture)
{
m_type = type;
m_format = format;
@@ -111,28 +112,87 @@ void GSTexture12::Destroy(bool defer)
m_write_descriptor_type = WriteDescriptorType::None;
}
// For use with non-simultaneous textures only.
// Simultaneous testures are always D3D12_BARRIER_LAYOUT_COMMON.
static D3D12_BARRIER_LAYOUT GetD3D12BarrierLayout(GSTexture12::ResourceState state)
{
switch (state)
{
case GSTexture12::ResourceState::Undefined:
return D3D12_BARRIER_LAYOUT_UNDEFINED;
case GSTexture12::ResourceState::Present:
return D3D12_BARRIER_LAYOUT_COMMON;
case GSTexture12::ResourceState::RenderTarget:
return D3D12_BARRIER_LAYOUT_RENDER_TARGET;
case GSTexture12::ResourceState::DepthWriteStencil:
return D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
case GSTexture12::ResourceState::PixelShaderResource:
case GSTexture12::ResourceState::ComputeShaderResource:
return D3D12_BARRIER_LAYOUT_SHADER_RESOURCE;
case GSTexture12::ResourceState::CopySrc:
return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE;
case GSTexture12::ResourceState::CopyDst:
return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST;
case GSTexture12::ResourceState::CASShaderUAV:
case GSTexture12::ResourceState::PixelShaderUAV:
return D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS;
default:
pxAssert(false);
return D3D12_BARRIER_LAYOUT_UNDEFINED;
}
}
static D3D12_RESOURCE_STATES GetD3D12ResourceState(GSTexture12::ResourceState state)
{
switch (state)
{
case GSTexture12::ResourceState::Undefined:
return D3D12_RESOURCE_STATE_COMMON;
case GSTexture12::ResourceState::Present:
return D3D12_RESOURCE_STATE_COMMON;
case GSTexture12::ResourceState::RenderTarget:
return D3D12_RESOURCE_STATE_RENDER_TARGET;
case GSTexture12::ResourceState::DepthWriteStencil:
return D3D12_RESOURCE_STATE_DEPTH_WRITE;
case GSTexture12::ResourceState::PixelShaderResource:
return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
case GSTexture12::ResourceState::ComputeShaderResource:
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
case GSTexture12::ResourceState::CopySrc:
return D3D12_RESOURCE_STATE_COPY_SOURCE;
case GSTexture12::ResourceState::CopyDst:
return D3D12_RESOURCE_STATE_COPY_DEST;
case GSTexture12::ResourceState::CASShaderUAV:
case GSTexture12::ResourceState::PixelShaderUAV:
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
default:
pxAssert(false);
return D3D12_RESOURCE_STATE_COMMON;
}
}
std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int width, int height, int levels,
DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format,
DXGI_FORMAT uav_format)
{
GSDevice12* const dev = GSDevice12::GetInstance();
D3D12_RESOURCE_DESC desc = {};
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Width = width;
desc.Height = height;
desc.DepthOrArraySize = 1;
desc.MipLevels = levels;
desc.Format = dxgi_format;
desc.SampleDesc.Count = 1;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
GSDevice12::D3D12_RESOURCE_DESCU desc = {};
desc.desc1.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.desc1.Width = width;
desc.desc1.Height = height;
desc.desc1.DepthOrArraySize = 1;
desc.desc1.MipLevels = levels;
desc.desc1.Format = dxgi_format;
desc.desc1.SampleDesc.Count = 1;
desc.desc1.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
D3D12MA::ALLOCATION_DESC allocationDesc = {};
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_WITHIN_BUDGET;
allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT;
D3D12_CLEAR_VALUE optimized_clear_value = {};
D3D12_RESOURCE_STATES state;
ResourceState state;
switch (type)
{
@@ -140,9 +200,9 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
{
// This is a little annoying. basically, to do mipmap generation, we need to be a render target.
// If it's a compressed texture, we won't be generating mips anyway, so this should be fine.
desc.Flags = (levels > 1 && !IsCompressedFormat(format)) ? D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET :
D3D12_RESOURCE_FLAG_NONE;
state = D3D12_RESOURCE_STATE_COPY_DEST;
desc.desc1.Flags = (levels > 1 && !IsCompressedFormat(format)) ? D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET :
D3D12_RESOURCE_FLAG_NONE;
state = ResourceState::CopyDst;
}
break;
@@ -152,10 +212,11 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
pxAssert(levels == 1);
allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED;
allocationDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
desc.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE;
desc.desc1.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS;
if (!dev->UseEnhancedBarriers())
desc.desc1.Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE;
optimized_clear_value.Format = rtv_format;
state = D3D12_RESOURCE_STATE_RENDER_TARGET;
state = ResourceState::RenderTarget;
}
break;
@@ -163,9 +224,9 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
{
pxAssert(levels == 1);
allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
desc.desc1.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
optimized_clear_value.Format = dsv_format;
state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
state = ResourceState::DepthWriteStencil;
}
break;
@@ -173,7 +234,7 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
{
pxAssert(levels == 1);
allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED;
state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
state = ResourceState::PixelShaderResource;
}
break;
@@ -182,15 +243,16 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
}
if (uav_format != DXGI_FORMAT_UNKNOWN)
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
desc.desc1.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
wil::com_ptr_nothrow<ID3D12Resource> resource;
wil::com_ptr_nothrow<ID3D12Resource> resource_fbl;
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
if (type == Type::RenderTarget)
if (type == Type::RenderTarget && !dev->UseEnhancedBarriers())
{
const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc);
// We need to use an aliased resource for feedback with legacy barriers.
const D3D12_RESOURCE_ALLOCATION_INFO allocInfo = dev->GetDevice()->GetResourceAllocationInfo(0, 1, &desc.desc);
HRESULT hr = dev->GetAllocator()->AllocateMemory(&allocationDesc, &allocInfo, allocation.put());
if (FAILED(hr))
@@ -202,7 +264,7 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
return {};
}
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, state,
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc.desc, GetD3D12ResourceState(state),
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr,
IID_PPV_ARGS(resource.put()));
if (FAILED(hr))
@@ -214,7 +276,7 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
return {};
}
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
hr = dev->GetAllocator()->CreateAliasingResource(allocation.get(), 0, &desc.desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr,
IID_PPV_ARGS(resource_fbl.put()));
if (FAILED(hr))
@@ -228,9 +290,20 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
}
else
{
HRESULT hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc, state,
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(),
IID_PPV_ARGS(resource.put()));
HRESULT hr;
if (dev->UseEnhancedBarriers())
{
hr = dev->GetAllocator()->CreateResource3(&allocationDesc, &desc.desc1,
type == Type::RenderTarget ? D3D12_BARRIER_LAYOUT_COMMON : GetD3D12BarrierLayout(state),
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr,
0, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
}
else
{
hr = dev->GetAllocator()->CreateResource(&allocationDesc, &desc.desc, GetD3D12ResourceState(state),
(type == Type::RenderTarget || type == Type::DepthStencil) ? &optimized_clear_value : nullptr, allocation.put(),
IID_PPV_ARGS(resource.put()));
}
if (FAILED(hr))
{
// OOM isn't fatal.
@@ -301,8 +374,10 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
return {};
}
// Feedback descriptor used with legacy barriers
if (resource_fbl)
{
pxAssert(!dev->UseEnhancedBarriers());
if (!CreateSRVDescriptor(resource_fbl.get(), levels, srv_format, &fbl_descriptor))
{
dev->GetDescriptorHeapManager().Free(&uav_descriptor);
@@ -325,12 +400,12 @@ std::unique_ptr<GSTexture12> GSTexture12::Create(Type type, Format format, int w
return std::unique_ptr<GSTexture12>(
new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(resource_fbl), std::move(allocation),
srv_descriptor, write_descriptor, ro_dsv_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, state));
srv_descriptor, write_descriptor, ro_dsv_descriptor, uav_descriptor, fbl_descriptor, write_descriptor_type, type == Type::RenderTarget, state));
}
std::unique_ptr<GSTexture12> GSTexture12::Adopt(wil::com_ptr_nothrow<ID3D12Resource> resource, Type type, Format format,
int width, int height, int levels, DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format,
DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, D3D12_RESOURCE_STATES resource_state)
DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, ResourceState resource_state)
{
const D3D12_RESOURCE_DESC desc = resource->GetDesc();
@@ -391,7 +466,7 @@ std::unique_ptr<GSTexture12> GSTexture12::Adopt(wil::com_ptr_nothrow<ID3D12Resou
return std::unique_ptr<GSTexture12>(new GSTexture12(type, format, static_cast<u32>(desc.Width), desc.Height,
desc.MipLevels, desc.Format, std::move(resource), {}, {}, srv_descriptor, write_descriptor, {}, uav_descriptor,
{}, write_descriptor_type, resource_state));
{}, write_descriptor_type, false, resource_state));
}
bool GSTexture12::CreateSRVDescriptor(
@@ -432,7 +507,7 @@ bool GSTexture12::CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT form
return false;
}
const D3D12_DEPTH_STENCIL_VIEW_DESC desc = {format, D3D12_DSV_DIMENSION_TEXTURE2D, read_only ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE };
const D3D12_DEPTH_STENCIL_VIEW_DESC desc = {format, D3D12_DSV_DIMENSION_TEXTURE2D, read_only ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE};
GSDevice12::GetInstance()->GetDevice()->CreateDepthStencilView(resource, &desc, dh->cpu_handle);
return true;
}
@@ -455,7 +530,7 @@ void* GSTexture12::GetNativeHandle() const
return const_cast<GSTexture12*>(this);
}
ID3D12GraphicsCommandList* GSTexture12::GetCommandBufferForUpdate()
const D3D12CommandList& GSTexture12::GetCommandBufferForUpdate()
{
GSDevice12* const dev = GSDevice12::GetInstance();
if (m_type != Type::Texture || m_use_fence_counter == dev->GetCurrentFenceValue())
@@ -476,10 +551,20 @@ ID3D12Resource* GSTexture12::AllocateUploadStagingBuffer(
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
HRESULT hr;
if (GSDevice12::GetInstance()->UseEnhancedBarriers())
{
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocation_desc, &resource_desc.desc1,
D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
}
else
{
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc.desc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
}
if (FAILED(hr))
{
Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr);
@@ -564,14 +649,14 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
sbuffer.CommitMemory(required_size);
}
ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate();
const D3D12CommandList& cmdlist = GetCommandBufferForUpdate();
GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
// first time the texture is used? don't leave it undefined
if (m_resource_state == D3D12_RESOURCE_STATE_COMMON)
TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST);
else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST)
TransitionSubresourceToState(cmdlist, layer, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST);
if (m_resource_state == GSTexture12::ResourceState::Undefined)
TransitionToState(cmdlist, GSTexture12::ResourceState::CopyDst);
else if (m_resource_state != GSTexture12::ResourceState::CopyDst)
TransitionSubresourceToState(cmdlist, layer, m_resource_state, GSTexture12::ResourceState::CopyDst);
// if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear
if (m_type == Type::RenderTarget)
@@ -588,11 +673,11 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
dstloc.SubresourceIndex = layer;
const D3D12_BOX srcbox{0u, 0u, 0u, width, height, 1u};
cmdlist->CopyTextureRegion(&dstloc, Common::AlignDownPow2((u32)r.x, block_size),
cmdlist.list4->CopyTextureRegion(&dstloc, Common::AlignDownPow2((u32)r.x, block_size),
Common::AlignDownPow2((u32)r.y, block_size), 0, &srcloc, &srcbox);
if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST)
TransitionSubresourceToState(cmdlist, layer, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state);
if (m_resource_state != GSTexture12::ResourceState::CopyDst)
TransitionSubresourceToState(cmdlist, layer, GSTexture12::ResourceState::CopyDst, m_resource_state);
if (m_type == Type::Texture)
m_needs_mipmaps_generated |= (layer == 0);
@@ -642,15 +727,15 @@ void GSTexture12::Unmap()
const u32 buffer_offset = buffer.GetCurrentOffset();
buffer.CommitMemory(required_size);
ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate();
const D3D12CommandList& cmdlist = GetCommandBufferForUpdate();
GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", m_map_area.x, m_map_area.y, m_map_area.width(),
m_map_area.height(), m_map_level);
// first time the texture is used? don't leave it undefined
if (m_resource_state == D3D12_RESOURCE_STATE_COMMON)
TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST);
else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST)
TransitionSubresourceToState(cmdlist, m_map_level, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST);
if (m_resource_state == ResourceState::Undefined)
TransitionToState(cmdlist, ResourceState::CopyDst);
else if (m_resource_state != ResourceState::CopyDst)
TransitionSubresourceToState(cmdlist, m_map_level, m_resource_state, ResourceState::CopyDst);
// if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear
if (m_type == Type::RenderTarget)
@@ -677,10 +762,10 @@ void GSTexture12::Unmap()
dstloc.SubresourceIndex = m_map_level;
const D3D12_BOX srcbox{0u, 0u, 0u, width, height, 1};
cmdlist->CopyTextureRegion(&dstloc, m_map_area.x, m_map_area.y, 0, &srcloc, &srcbox);
cmdlist.list4->CopyTextureRegion(&dstloc, m_map_area.x, m_map_area.y, 0, &srcloc, &srcbox);
if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST)
TransitionSubresourceToState(cmdlist, m_map_level, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state);
if (m_resource_state != ResourceState::CopyDst)
TransitionSubresourceToState(cmdlist, m_map_level, ResourceState::CopyDst, m_resource_state);
if (m_type == Type::Texture)
m_needs_mipmaps_generated |= (m_map_level == 0);
@@ -717,55 +802,343 @@ void GSTexture12::SetDebugName(std::string_view name)
#endif
void GSTexture12::TransitionToState(D3D12_RESOURCE_STATES state)
void GSTexture12::TransitionToState(ResourceState state)
{
TransitionToState(GSDevice12::GetInstance()->GetCommandList(), state);
}
void GSTexture12::TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state)
void GSTexture12::TransitionToState(const D3D12CommandList& cmdlist, ResourceState state)
{
if (m_resource_state == state)
return;
// Read only depth requires special handling as we might want to write stencil.
// Also batch the transition barriers as per recommendation from docs.
if (state == (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE))
{
// Transition to read depth/write stencil
const D3D12_RESOURCE_BARRIER barriers[2] = {
{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), 0, m_resource_state, (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)}}},
{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), 1, m_resource_state, D3D12_RESOURCE_STATE_DEPTH_WRITE}}},
};
cmdlist->ResourceBarrier(m_resource_state == D3D12_RESOURCE_STATE_DEPTH_WRITE ? 1 : 2, barriers);
}
else if (m_resource_state == (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE))
{
// Transition from read depth/write stencil
const D3D12_RESOURCE_BARRIER barriers[2] = {
{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), 0, (D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), state}}},
{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), 1, D3D12_RESOURCE_STATE_DEPTH_WRITE, state}}},
};
cmdlist->ResourceBarrier(state == D3D12_RESOURCE_STATE_DEPTH_WRITE ? 1 : 2, barriers);
}
else
{
// Normal transition
TransitionSubresourceToState(cmdlist, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, m_resource_state, state);
}
TransitionSubresourceToState(cmdlist, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, m_resource_state, state);
m_resource_state = state;
}
void GSTexture12::TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, int level,
D3D12_RESOURCE_STATES before_state, D3D12_RESOURCE_STATES after_state) const
void GSTexture12::TransitionSubresourceToState(const D3D12CommandList& cmdlist, int level,
ResourceState before_state, ResourceState after_state) const
{
const D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), static_cast<u32>(level), before_state, after_state}}};
cmdlist->ResourceBarrier(1, &barrier);
if (GSDevice12::GetInstance()->UseEnhancedBarriers())
{
// Read only depth requires special handling as we might want to write stencil.
// We need to transition subresources separately, requiring 2 barriers
// Handling it here allows us to batch those barriers.
// Other transitions only need the one barrier.
D3D12_TEXTURE_BARRIER barriers[2] = {{D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_NONE,
D3D12_BARRIER_ACCESS_COMMON, D3D12_BARRIER_ACCESS_COMMON,
D3D12_BARRIER_LAYOUT_COMMON, D3D12_BARRIER_LAYOUT_COMMON,
m_resource.get(), {static_cast<u32>(level), 0, 0, 0, 0, 0}, D3D12_TEXTURE_BARRIER_FLAG_NONE}};
uint num_barriers = 1;
D3D12_TEXTURE_BARRIER& barrier = barriers[0];
switch (before_state)
{
case ResourceState::Undefined:
case ResourceState::Present:
barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_COMMON;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE;
break;
case ResourceState::RenderTarget:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
barrier.AccessBefore = m_simultaneous_tex ?
D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE :
D3D12_BARRIER_ACCESS_RENDER_TARGET;
barrier.SyncBefore = m_simultaneous_tex ?
D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING :
D3D12_BARRIER_SYNC_RENDER_TARGET;
break;
case ResourceState::DepthWriteStencil:
pxAssert(!m_simultaneous_tex);
barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
barrier.SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL;
break;
case ResourceState::DepthReadStencil:
pxAssert(!m_simultaneous_tex);
pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
barriers[0].Subresources = {0, static_cast<uint>(m_mipmap_levels), 0, 1, 0, 1};
barriers[0].LayoutBefore = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
barriers[0].AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
barriers[0].SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL | D3D12_BARRIER_SYNC_PIXEL_SHADING;
if (after_state != ResourceState::DepthWriteStencil)
{
num_barriers = 2;
barriers[1].Subresources = {0, static_cast<uint>(m_mipmap_levels), 0, 1, 1, 1};
barriers[1].LayoutBefore = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
barriers[1].AccessBefore = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
barriers[1].SyncBefore = D3D12_BARRIER_SYNC_DEPTH_STENCIL;
}
break;
case ResourceState::PixelShaderResource:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
barrier.SyncBefore = D3D12_BARRIER_SYNC_PIXEL_SHADING;
break;
case ResourceState::ComputeShaderResource:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
barrier.SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING;
break;
case ResourceState::CopySrc:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_COPY_SOURCE;
barrier.SyncBefore = D3D12_BARRIER_SYNC_COPY;
break;
case ResourceState::CopyDst:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_COPY_DEST;
barrier.SyncBefore = D3D12_BARRIER_SYNC_COPY;
break;
case ResourceState::CASShaderUAV:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
barrier.SyncBefore = D3D12_BARRIER_SYNC_COMPUTE_SHADING;
break;
case ResourceState::PixelShaderUAV:
barrier.LayoutBefore = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
barrier.SyncBefore = D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
break;
default:
pxAssert(false);
barrier.LayoutBefore = D3D12_BARRIER_LAYOUT_UNDEFINED;
barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE;
break;
}
switch (after_state)
{
case ResourceState::Undefined:
case ResourceState::Present:
barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_COMMON;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE;
break;
case ResourceState::RenderTarget:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_RENDER_TARGET;
barrier.AccessAfter = m_simultaneous_tex ?
D3D12_BARRIER_ACCESS_RENDER_TARGET | D3D12_BARRIER_ACCESS_SHADER_RESOURCE :
D3D12_BARRIER_ACCESS_RENDER_TARGET;
barrier.SyncAfter = m_simultaneous_tex ?
D3D12_BARRIER_SYNC_RENDER_TARGET | D3D12_BARRIER_SYNC_PIXEL_SHADING :
D3D12_BARRIER_SYNC_RENDER_TARGET;
break;
case ResourceState::DepthWriteStencil:
pxAssert(!m_simultaneous_tex);
barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE;
barrier.SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL;
break;
case ResourceState::DepthReadStencil:
pxAssert(!m_simultaneous_tex);
pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
barriers[0].Subresources = {0, static_cast<uint>(m_mipmap_levels), 0, 1, 0, 1};
barriers[0].LayoutAfter = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ;
barriers[0].AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE | D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
barriers[0].SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL | D3D12_BARRIER_SYNC_PIXEL_SHADING;
if (before_state != ResourceState::DepthWriteStencil)
{
num_barriers = 2;
barriers[1].Subresources = {0, static_cast<uint>(m_mipmap_levels), 0, 1, 1, 1};
barriers[1].LayoutAfter = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE;
barriers[1].AccessAfter = D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ;
barriers[1].SyncAfter = D3D12_BARRIER_SYNC_DEPTH_STENCIL;
}
break;
case ResourceState::PixelShaderResource:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
barrier.SyncAfter = D3D12_BARRIER_SYNC_PIXEL_SHADING;
break;
case ResourceState::ComputeShaderResource:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_SHADER_RESOURCE;
barrier.SyncAfter = D3D12_BARRIER_SYNC_COMPUTE_SHADING;
break;
case ResourceState::CopySrc:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_COPY_SOURCE;
barrier.SyncAfter = D3D12_BARRIER_SYNC_COPY;
break;
case ResourceState::CopyDst:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_COPY_DEST;
barrier.SyncAfter = D3D12_BARRIER_SYNC_COPY;
break;
case ResourceState::CASShaderUAV:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
barrier.SyncAfter = D3D12_BARRIER_SYNC_COMPUTE_SHADING;
break;
case ResourceState::PixelShaderUAV:
barrier.LayoutAfter = m_simultaneous_tex ? D3D12_BARRIER_LAYOUT_COMMON : D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_UNORDERED_ACCESS;
barrier.SyncAfter = D3D12_BARRIER_SYNC_PIXEL_SHADING | D3D12_BARRIER_SYNC_CLEAR_UNORDERED_ACCESS_VIEW;
break;
default:
pxAssert(false);
barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_UNDEFINED;
barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE;
break;
}
if (num_barriers == 2)
{
barriers[1].pResource = m_resource.get();
barriers[1].Flags = barriers[0].Flags;
if (before_state == ResourceState::DepthReadStencil)
{
barriers[1].LayoutAfter = barriers[0].LayoutAfter;
barriers[1].AccessAfter = barriers[0].AccessAfter;
barriers[1].SyncAfter = barriers[0].SyncAfter;
}
else // after_state == ResourceState::DepthReadStencil
{
barriers[1].LayoutBefore = barriers[0].LayoutBefore;
barriers[1].AccessBefore = barriers[0].AccessBefore;
barriers[1].SyncBefore = barriers[0].SyncBefore;
}
}
const D3D12_BARRIER_GROUP group = {.Type = D3D12_BARRIER_TYPE_TEXTURE, .NumBarriers = num_barriers, .pTextureBarriers = barriers};
cmdlist.list7->Barrier(1, &group);
}
else
{
// Read only depth requires special handling as we might want to write stencil.
// We need to transition subresources separately, requiring 2 barriers.
// Handling it here allows us to batch those barriers.
// Other transitions only need the one barrier.
D3D12_RESOURCE_BARRIER barriers[2] = {{D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE,
{{m_resource.get(), static_cast<u32>(level), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COMMON}}}};
int num_barriers = 1;
D3D12_RESOURCE_BARRIER& barrier = barriers[0];
switch (before_state)
{
case ResourceState::Undefined:
case ResourceState::Present:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
break;
case ResourceState::RenderTarget:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
break;
case ResourceState::DepthWriteStencil:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE;
break;
case ResourceState::DepthReadStencil:
pxAssert(!m_simultaneous_tex);
pxAssert(m_mipmap_levels == 1);
pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
barriers[0].Transition.Subresource = 0;
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_DEPTH_READ;
if (after_state != ResourceState::DepthWriteStencil)
{
num_barriers = 2;
barriers[1].Transition.Subresource = 1;
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE;
}
break;
case ResourceState::PixelShaderResource:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
break;
case ResourceState::ComputeShaderResource:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
break;
case ResourceState::CopySrc:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
break;
case ResourceState::CopyDst:
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
break;
case ResourceState::CASShaderUAV:
case ResourceState::PixelShaderUAV:
// Handled in after_state cases.
if (after_state == ResourceState::CASShaderUAV || after_state == ResourceState::PixelShaderUAV)
break;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
default:
pxAssert(false);
break;
}
switch (after_state)
{
case ResourceState::Undefined:
case ResourceState::Present:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
break;
case ResourceState::RenderTarget:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
break;
case ResourceState::DepthWriteStencil:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;
break;
case ResourceState::DepthReadStencil:
pxAssert(!m_simultaneous_tex);
pxAssert(m_mipmap_levels == 1);
pxAssert(level == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
barriers[0].Transition.Subresource = 0;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_DEPTH_READ;
if (before_state != ResourceState::DepthWriteStencil)
{
num_barriers = 2;
barriers[1].Transition.Subresource = 1;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;
}
break;
case ResourceState::PixelShaderResource:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
break;
case ResourceState::ComputeShaderResource:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
break;
case ResourceState::CopySrc:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
break;
case ResourceState::CopyDst:
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
break;
case ResourceState::CASShaderUAV:
case ResourceState::PixelShaderUAV:
if (before_state == ResourceState::CASShaderUAV || before_state == ResourceState::PixelShaderUAV)
{
// No state transition, but probably want a barrier instead.
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
// pResource is a common initial member, so no need to set again.
}
else
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
break;
default:
pxAssert(false);
break;
}
if (num_barriers == 2)
{
barriers[1].Transition.pResource = m_resource.get();
barriers[1].Type = barriers[0].Type;
barriers[1].Flags = barriers[0].Flags;
if (before_state == ResourceState::DepthReadStencil)
barriers[1].Transition.StateAfter = barriers[0].Transition.StateAfter;
else // after_state == ResourceState::DepthReadStencil
barriers[1].Transition.StateBefore = barriers[0].Transition.StateBefore;
}
cmdlist.list4->ResourceBarrier(num_barriers, barriers);
}
}
void GSTexture12::CommitClear()
@@ -774,22 +1147,21 @@ void GSTexture12::CommitClear()
return;
GSDevice12::GetInstance()->EndRenderPass();
CommitClear(GSDevice12::GetInstance()->GetCommandList());
}
void GSTexture12::CommitClear(ID3D12GraphicsCommandList* cmdlist)
void GSTexture12::CommitClear(const D3D12CommandList& cmdlist)
{
if (IsDepthStencil())
{
TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE);
cmdlist->ClearDepthStencilView(
TransitionToState(cmdlist, ResourceState::DepthWriteStencil);
cmdlist.list4->ClearDepthStencilView(
GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_clear_value.depth, 0, 0, nullptr);
}
else
{
TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmdlist->ClearRenderTargetView(GetWriteDescriptor(), GSVector4::unorm8(m_clear_value.color).v, 0, nullptr);
TransitionToState(cmdlist, ResourceState::RenderTarget);
cmdlist.list4->ClearRenderTargetView(GetWriteDescriptor(), GSVector4::unorm8(m_clear_value.color).v, 0, nullptr);
}
SetState(GSTexture::State::Dirty);
@@ -816,14 +1188,23 @@ std::unique_ptr<GSDownloadTexture12> GSDownloadTexture12::Create(u32 width, u32
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
const GSDevice12::D3D12_RESOURCE_DESCU resource_desc = {{D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}};
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
wil::com_ptr_nothrow<ID3D12Resource> buffer;
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
HRESULT hr;
if (GSDevice12::GetInstance()->UseEnhancedBarriers())
{
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource3(&allocation_desc, &resource_desc.desc1,
D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, 0, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
}
else
{
hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc.desc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
}
if (FAILED(hr))
{
Console.Error("(GSDownloadTexture12::Create) CreateResource() failed with HRESULT %08X", hr);
@@ -861,7 +1242,7 @@ void GSDownloadTexture12::CopyFromTexture(
if (IsMapped())
Unmap();
ID3D12GraphicsCommandList* cmdlist = GSDevice12::GetInstance()->GetCommandList();
const D3D12CommandList& cmdlist = GSDevice12::GetInstance()->GetCommandList();
GL_INS("ReadbackTexture: {%d,%d} %ux%u", src.left, src.top, src.width(), src.height());
D3D12_TEXTURE_COPY_LOCATION srcloc;
@@ -879,17 +1260,17 @@ void GSDownloadTexture12::CopyFromTexture(
dstloc.PlacedFootprint.Footprint.Depth = 1;
dstloc.PlacedFootprint.Footprint.RowPitch = m_current_pitch;
const D3D12_RESOURCE_STATES old_layout = tex12->GetResourceState();
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
tex12->TransitionSubresourceToState(cmdlist, src_level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE);
const GSTexture12::ResourceState old_layout = tex12->GetResourceState();
if (old_layout != GSTexture12::ResourceState::CopySrc)
tex12->TransitionSubresourceToState(cmdlist, src_level, old_layout, GSTexture12::ResourceState::CopySrc);
// TODO: Rules for depth buffers here?
const D3D12_BOX srcbox{static_cast<UINT>(src.left), static_cast<UINT>(src.top), 0u, static_cast<UINT>(src.right),
static_cast<UINT>(src.bottom), 1u};
cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
cmdlist.list4->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
tex12->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout);
if (old_layout != GSTexture12::ResourceState::CopySrc)
tex12->TransitionSubresourceToState(cmdlist, src_level, GSTexture12::ResourceState::CopySrc, old_layout);
m_copy_fence_value = GSDevice12::GetInstance()->GetCurrentFenceValue();
m_needs_flush = true;

View File

@@ -16,9 +16,27 @@ namespace D3D12MA
class Allocation;
}
struct D3D12CommandList;
class GSTexture12 final : public GSTexture
{
public:
enum class ResourceState : u32
{
Undefined,
Present,
RenderTarget,
DepthWriteStencil,
DepthReadStencil,
PixelShaderResource,
ComputeShaderResource,
CopySrc,
CopyDst,
CASShaderUAV, // No Clear UAV Sync
PixelShaderUAV,
Count
};
~GSTexture12() override;
static std::unique_ptr<GSTexture12> Create(Type type, Format format, int width, int height, int levels,
@@ -26,14 +44,14 @@ public:
DXGI_FORMAT uav_format);
static std::unique_ptr<GSTexture12> Adopt(wil::com_ptr_nothrow<ID3D12Resource> resource, Type type, Format format,
int width, int height, int levels, DXGI_FORMAT dxgi_format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format,
DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, D3D12_RESOURCE_STATES resource_state);
DXGI_FORMAT dsv_format, DXGI_FORMAT uav_format, ResourceState resource_state);
__fi const D3D12DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; }
__fi const D3D12DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; }
__fi const D3D12DescriptorHandle& GetReadDepthViewDescriptor() const { return m_read_dsv_descriptor; }
__fi const D3D12DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; }
__fi const D3D12DescriptorHandle& GetFBLDescriptor() const { return m_fbl_descriptor; }
__fi D3D12_RESOURCE_STATES GetResourceState() const { return m_resource_state; }
__fi ResourceState GetResourceState() const { return m_resource_state; }
__fi DXGI_FORMAT GetDXGIFormat() const { return m_dxgi_format; }
__fi ID3D12Resource* GetResource() const { return m_resource.get(); }
__fi ID3D12Resource* GetFBLResource() const { return m_resource_fbl.get(); }
@@ -49,15 +67,15 @@ public:
void SetDebugName(std::string_view name) override;
#endif
void TransitionToState(D3D12_RESOURCE_STATES state);
void TransitionToState(ResourceState state);
void CommitClear();
void CommitClear(ID3D12GraphicsCommandList* cmdlist);
void CommitClear(const D3D12CommandList& cmdlist);
void Destroy(bool defer = true);
void TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state);
void TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, int level, D3D12_RESOURCE_STATES before_state,
D3D12_RESOURCE_STATES after_state) const;
void TransitionToState(const D3D12CommandList&, ResourceState state);
void TransitionSubresourceToState(const D3D12CommandList& cmdlist, int level, ResourceState before_state,
ResourceState after_state) const;
// Call when the texture is bound to the pipeline, or read from in a copy.
__fi void SetUseFenceCounter(u64 val) { m_use_fence_counter = val; }
@@ -75,7 +93,7 @@ private:
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation, const D3D12DescriptorHandle& srv_descriptor,
const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& ro_dsv_descriptor,
const D3D12DescriptorHandle& uav_descriptor, const D3D12DescriptorHandle& fbl_descriptor,
WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state);
WriteDescriptorType wdtype, bool simultaneous_texture, ResourceState resource_state);
static bool CreateSRVDescriptor(
ID3D12Resource* resource, u32 levels, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
@@ -83,7 +101,7 @@ private:
static bool CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh, bool read_only);
static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
ID3D12GraphicsCommandList* GetCommandBufferForUpdate();
const D3D12CommandList& GetCommandBufferForUpdate();
ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const;
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
@@ -99,7 +117,11 @@ private:
WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None;
DXGI_FORMAT m_dxgi_format = DXGI_FORMAT_UNKNOWN;
D3D12_RESOURCE_STATES m_resource_state = D3D12_RESOURCE_STATE_COMMON;
ResourceState m_resource_state = ResourceState::Undefined;
// With legacy barriers, an aliased resource is used as the feedback shader resource.
// With enhanced barriers, the layout is always COMMON, but can use the main resource for feedback.
bool m_simultaneous_tex;
// Contains the fence counter when the texture was last used.
// When this matches the current fence counter, the texture was used this command buffer.

View File

@@ -165,11 +165,12 @@ GSTexture* GSRendererHW::GetOutput(int i, float& scale, int& y_offset)
if (GSTextureCache::Target* rt = g_texture_cache->LookupDisplayTarget(TEX0, framebufferSize, GetTextureScaleFactor(), false))
{
const u32 bp_adj = (TEX0.TBP0 < rt->m_TEX0.TBP0 && rt->UnwrappedEndBlock() > GS_MAX_BLOCKS) ? (TEX0.TBP0 + GS_MAX_BLOCKS) : TEX0.TBP0;
rt->Update();
t = rt->m_texture;
scale = rt->m_scale;
const int delta = TEX0.TBP0 - rt->m_TEX0.TBP0;
const int delta = bp_adj - rt->m_TEX0.TBP0;
if (delta > 0 && curFramebuffer.FBW != 0)
{
const int pages = delta >> 5u;
@@ -2338,6 +2339,12 @@ void GSRendererHW::Draw()
return;
}
// Sometimes everything will get reset and it will draw a single black point in the top left corner,
// which can cause invalid targets to be created, so might as well skip it.
if (GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).eq(GSVector4i::zero()) && m_vt.m_eq.rgba == 0xffff &&
m_vt.m_max.c.rgba32() == 0 && m_draw_env->PRIM.PRIM == GS_POINTLIST && m_env.PRIM.PRIM != GS_POINTLIST)
return;
// Channel shuffles repeat lots of draws. Get out early if we can.
if (m_channel_shuffle)
{
@@ -2844,7 +2851,7 @@ void GSRendererHW::Draw()
const bool page_aligned = (m_r.w % pgs.y) == (pgs.y - 1) || (m_r.w % pgs.y) == 0;
const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color && page_aligned);
const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth && page_aligned);
bool gs_mem_cleared = false;
// If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets.
// If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth.
if (is_zero_color_clear || is_zero_depth_clear || height_invalid)
@@ -2876,7 +2883,7 @@ void GSRendererHW::Draw()
{
g_texture_cache->InvalidateTemporaryZ();
}
gs_mem_cleared |= overwriting_whole_rt && overwriting_whole_ds && (!no_rt || !no_ds);
if (overwriting_whole_rt && overwriting_whole_ds &&
TryGSMemClear(no_rt, preserve_rt_color, is_zero_color_clear, rt_end_bp,
no_ds, preserve_depth, is_zero_depth_clear, ds_end_bp))
@@ -2906,6 +2913,27 @@ void GSRendererHW::Draw()
return;
}
}
// If not a zero clear or the RT's aren't fully overwritten, we need to see if this is clearing for a future operation.
// So if the FBP or Z being cleared isn't getting used next frame, clear the actual GS memory.
if (!gs_mem_cleared)
{
const int get_next_ctx = m_env.PRIM.CTXT;
const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx];
if ((!no_rt && next_ctx.FRAME.FBP != m_cached_ctx.FRAME.FBP) || (!no_ds && next_ctx.ZBUF.ZBP != m_cached_ctx.ZBUF.ZBP))
{
bool frame_masked = no_rt || (m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk) || !IsOpaque() || !IsRTWritten();
const bool z_masked = no_ds || m_cached_ctx.ZBUF.ZMSK;
if (frame_masked && m_cached_ctx.FRAME.PSM == PSMCT32 && m_cached_ctx.FRAME.FBMSK == 0xFF000000u)
{
frame_masked = no_rt || !IsOpaque() || !IsRTWritten();
}
// Force clear of memory but don't invalidate anything.
TryGSMemClear(frame_masked, false, false, 0, z_masked, false, false, 0);
}
}
}
GIFRegTEX0 TEX0 = {};
@@ -3215,6 +3243,7 @@ void GSRendererHW::Draw()
float target_scale = GetTextureScaleFactor();
bool scaled_copy = false;
int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound);
m_downscale_source = false;
if (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off)
{
@@ -3248,8 +3277,6 @@ void GSRendererHW::Draw()
scale_draw = 1;
scaled_copy = true;
}
m_downscale_source = false;
}
}
@@ -8935,17 +8962,17 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
if (m_r.width() < ((static_cast<int>(m_cached_ctx.FRAME.FBW) - 1) * 64))
return false;
if (!no_rt && !preserve_rt)
if (!no_rt && (!preserve_rt || (IsOpaque() && m_cached_ctx.FRAME.FBMSK)))
{
ClearGSLocalMemory(m_context->offset.fb, m_r, GetConstantDirectWriteMemClearColor());
if (invalidate_rt)
if (invalidate_rt && !preserve_rt)
{
g_texture_cache->InvalidateVideoMem(m_context->offset.fb, m_r, false);
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW);
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.FBMSK);
GSUploadQueue clear_queue;
clear_queue.draw = s_n;
@@ -8956,6 +8983,13 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
clear_queue.zero_clear = true;
m_draw_transfers.push_back(clear_queue);
}
else
{
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.FBMSK, true);
}
}
if (!no_ds && !preserve_z)
@@ -8969,6 +9003,15 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r),
ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW);
GSUploadQueue clear_queue;
clear_queue.draw = s_n;
clear_queue.rect = m_r;
clear_queue.blit.DBP = m_cached_ctx.ZBUF.Block();
clear_queue.blit.DBW = m_cached_ctx.FRAME.FBW;
clear_queue.blit.DPSM = m_cached_ctx.ZBUF.PSM;
clear_queue.zero_clear = true;
m_draw_transfers.push_back(clear_queue);
}
}
@@ -8987,6 +9030,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
const int right = r.right;
const int bottom = r.bottom;
int top = r.top;
u32 drawing_mask = GSLocalMemory::m_psm[psm].depth ? 0x0 : m_cached_ctx.FRAME.FBMSK;
// Process the page aligned region first, then fall back to anything which is not.
// Since pages are linear in memory, we can do it basically with a vector memset.
@@ -9002,22 +9046,34 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
if (format == GSLocalMemory::PSM_FMT_32)
{
const GSVector4i vcolor = GSVector4i(vert_color);
const GSVector4i vcolor = GSVector4i(vert_color & ~drawing_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 4;
const GSVector4i mask = GSVector4i(drawing_mask);
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
{
current_page &= (GS_MAX_PAGES - 1);
GSVector4i* ptr = reinterpret_cast<GSVector4i*>(m_mem.vm8() + current_page * GS_PAGE_SIZE);
GSVector4i* const ptr_end = ptr + iterations_per_page;
while (ptr != ptr_end)
*(ptr++) = vcolor;
if (drawing_mask)
{
while (ptr != ptr_end)
{
*ptr = (*ptr & mask) | vcolor;
ptr++;
}
}
else
{
while (ptr != ptr_end)
*(ptr++) = vcolor;
}
}
}
else if (format == GSLocalMemory::PSM_FMT_24)
{
const GSVector4i mask = GSVector4i::xff000000();
const GSVector4i vcolor = GSVector4i(vert_color & 0x00ffffffu);
const GSVector4i mask = GSVector4i::xff000000() | GSVector4i(drawing_mask);
const GSVector4i vcolor = GSVector4i((vert_color & 0x00ffffffu) & ~drawing_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 4;
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
@@ -9036,7 +9092,10 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
{
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) |
((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
const u16 converted_mask = ((drawing_mask >> 16) & 0x8000) | ((drawing_mask >> 9) & 0x7C00) |
((drawing_mask >> 6) & 0x7E0) | ((drawing_mask >> 3) & 0x1F);
const GSVector4i vcolor = GSVector4i::broadcast16(converted_color);
const GSVector4i mask = GSVector4i::broadcast16(converted_mask);
const u32 iterations_per_page = (pages_wide * pixels_per_page) / 8;
pxAssert((off.bp() & (GS_BLOCKS_PER_PAGE - 1)) == 0);
for (u32 current_page = off.bp() >> 5; top < page_aligned_bottom; top += pgs.y, current_page += fbw)
@@ -9044,14 +9103,27 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
current_page &= (GS_MAX_PAGES - 1);
GSVector4i* ptr = reinterpret_cast<GSVector4i*>(m_mem.vm8() + current_page * GS_PAGE_SIZE);
GSVector4i* const ptr_end = ptr + iterations_per_page;
while (ptr != ptr_end)
*(ptr++) = vcolor;
if (converted_mask)
{
while (ptr != ptr_end)
{
*ptr = (*ptr & mask) | vcolor;
ptr++;
}
}
else
{
while (ptr != ptr_end)
*(ptr++) = vcolor;
}
}
}
}
if (format == GSLocalMemory::PSM_FMT_32)
{
const u32 mask = drawing_mask;
const u32 vcolor = vert_color & ~mask;
// Based on WritePixel32
u32* vm = m_mem.vm32();
for (int y = top; y < bottom; y++)
@@ -9059,25 +9131,28 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = vert_color;
vm[pa.value(x)] = vcolor | (vm[pa.value(x)] & mask);
}
}
else if (format == GSLocalMemory::PSM_FMT_24)
{
// Based on WritePixel24
u32* vm = m_mem.vm32();
const u32 write_color = vert_color & 0xffffffu;
const u32 mask = drawing_mask | 0xff000000u;
const u32 write_color = (vert_color & 0xffffffu) & ~mask;
for (int y = top; y < bottom; y++)
{
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle32).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = (vm[pa.value(x)] & 0xff000000u) | write_color;
vm[pa.value(x)] = (vm[pa.value(x)] & mask) | write_color;
}
}
else if (format == GSLocalMemory::PSM_FMT_16)
{
const u16 converted_color = ((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F);
const u16 converted_mask = ((drawing_mask >> 16) & 0x8000) | ((drawing_mask >> 9) & 0x7C00) |
((drawing_mask >> 6) & 0x7E0) | ((drawing_mask >> 3) & 0x1F);
const u16 converted_color = (((vert_color >> 16) & 0x8000) | ((vert_color >> 9) & 0x7C00) | ((vert_color >> 6) & 0x7E0) | ((vert_color >> 3) & 0x1F)) & ~converted_mask;
// Based on WritePixel16
u16* vm = m_mem.vm16();
@@ -9086,7 +9161,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
GSOffset::PAHelper pa = off.assertSizesMatch(GSLocalMemory::swizzle16).paMulti(0, y);
for (int x = left; x < right; x++)
vm[pa.value(x)] = converted_color;
vm[pa.value(x)] = converted_color | (vm[pa.value(x)] & converted_mask);
}
}
}

View File

@@ -1788,8 +1788,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
(GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp >= 16)) && // Channel shuffles or non indexed lookups.
t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/)
{
const u32 end_block = GSLocalMemory::GetEndBlockAddress(bp, TEX0.TBW, TEX0.PSM, r);
const u32 adj_bp = (end_block < t->m_TEX0.TBP0 && t->UnwrappedEndBlock() > GS_MAX_BLOCKS) ? (bp + GS_MAX_BLOCKS) : bp;
u32 rt_tbw = std::max(1U, t->m_TEX0.TBW);
u32 horz_page_offset = ((bp - t->m_TEX0.TBP0) >> 5) % rt_tbw;
u32 horz_page_offset = ((adj_bp - t->m_TEX0.TBP0) >> 5) % rt_tbw;
if (GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && bw != rt_tbw && block_boundary_rect.height() > GSLocalMemory::m_psm[psm].pgs.y)
continue;
@@ -1802,7 +1804,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
((t->m_TEX0.TBW < (horz_page_offset + ((block_boundary_rect.z + GSLocalMemory::m_psm[psm].pgs.x - 1) / GSLocalMemory::m_psm[psm].pgs.x)) ||
(t->m_TEX0.TBW != bw && block_boundary_rect.w > GSLocalMemory::m_psm[psm].pgs.y))))
{
DbgCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
DbgCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", adj_bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
continue;
}
// Keep note that 2 bw is basically 1 normal page, as bw is in 64 pixels, and 8bit pages are 128 pixels wide, aka 2 bw.
@@ -1814,7 +1816,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
((GSLocalMemory::m_psm[psm].bpp == 32) ? bw : ((bw + 1) / 2)) <= t->m_TEX0.TBW) &&
!(((GSLocalMemory::m_psm[psm].bpp == 32) ? bw : ((bw + 1) / 2)) == rt_tbw)))))
{
DbgCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
DbgCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", adj_bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
continue;
}
else if (!possible_shuffle && GSLocalMemory::m_psm[psm].bpp <= 8 && TEX0.TBW == 1)
@@ -1852,16 +1854,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
else // Formats are not compatible for normal draws, only shuffles.
continue;
}
if (bp > t->m_TEX0.TBP0)
if (adj_bp > t->m_TEX0.TBP0)
{
if (!region.HasEither() && GSLocalMemory::m_psm[psm].bpp == 32 && (t->m_TEX0.TBW - (((bp - t->m_TEX0.TBP0) >> 5) % rt_tbw)) < static_cast<u32>((block_boundary_rect.width() + 63) / 64))
if (!region.HasEither() && GSLocalMemory::m_psm[psm].bpp == 32 && (t->m_TEX0.TBW - (((adj_bp - t->m_TEX0.TBP0) >> 5) % rt_tbw)) < static_cast<u32>((block_boundary_rect.width() + 63) / 64))
{
DbgCon.Warning("Bad alignmenet");
continue;
}
// Make sure it's inside if not a shuffle, sometimes valid areas can get messy, like TOCA Race Driver 2 where it goes over to 480, but it's rounded up to 512 in the shuffle.
if (!possible_shuffle && !t->Inside(bp, bw, psm, block_boundary_rect))
if (!possible_shuffle && !t->Inside(adj_bp, bw, psm, block_boundary_rect))
continue;
GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (psm & 0x7) != PSMCT16) ? block_boundary_rect : rect;
@@ -1871,7 +1873,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
// Hitman Blood Money is an example of this in the theatre.
const u32 rt_tbw = (possible_shuffle || bw == 1 || GSUtil::GetChannelMask(psm) != 0x8 || frame.FBW <= bw || frame.FBW == t->m_TEX0.TBW || bw == t->m_TEX0.TBW) ? t->m_TEX0.TBW : frame.FBW;
const bool can_translate = CanTranslate(bp, bw, src_psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, rt_tbw);
const bool can_translate = CanTranslate(adj_bp, bw, src_psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, rt_tbw);
if (can_translate)
{
const bool swizzle_match = GSLocalMemory::m_psm[src_psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
@@ -1881,7 +1883,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (swizzle_match)
{
rect = TranslateAlignedRectByPage(t->m_TEX0.TBP0, t->m_end_block, rt_tbw, t->m_TEX0.PSM, t->m_valid, bp, src_psm, bw, new_rect);
rect = TranslateAlignedRectByPage(t->m_TEX0.TBP0, t->m_end_block, rt_tbw, t->m_TEX0.PSM, t->m_valid, adj_bp, src_psm, bw, new_rect);
rect.x -= new_rect.x;
rect.y -= new_rect.y;
}
@@ -1901,7 +1903,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
}
rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), src_psm, bw, new_rect);
rect = TranslateAlignedRectByPage(t, adj_bp & ~((1 << 5) - 1), src_psm, bw, new_rect);
rect.x -= new_rect.x & ~(page_size.x - 1);
rect.y -= new_rect.y & ~(page_size.y - 1);
}
@@ -1933,7 +1935,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
}
else
{
SurfaceOffset so = ComputeSurfaceOffset(bp, bw, src_psm, new_rect, t);
SurfaceOffset so = ComputeSurfaceOffset(adj_bp, bw, src_psm, new_rect, t);
if (!so.is_valid && t->Wraps())
{
// Improves Beyond Good & Evil shadow.
@@ -2692,10 +2694,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
const u32 end_block = GSLocalMemory::GetEndBlockAddress(bp, TEX0.TBW, TEX0.PSM, GSVector4i(0, size.y, size.x, size.y + 1));
const u32 bp_adj = (end_block < t->m_TEX0.TBP0 && t->UnwrappedEndBlock() > GS_MAX_BLOCKS) ? (bp + GS_MAX_BLOCKS) : bp;
const bool half_buffer_match = GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && TEX0.TBW == t->m_TEX0.TBW && TEX0.PSM == t->m_TEX0.PSM &&
bp == GSLocalMemory::GetStartBlockAddress(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, GSVector4i(0, size.y, size.x, size.y + 1));
// Make sure the target is inside the texture
if (t->m_TEX0.TBP0 <= bp && bp <= t->m_end_block && (half_buffer_match || t->Inside(bp, TEX0.TBW, TEX0.PSM, GSVector4i::loadh(size))))
if (t->m_TEX0.TBP0 <= bp_adj && bp_adj <= t->UnwrappedEndBlock() && (half_buffer_match || t->Inside(bp_adj, TEX0.TBW, TEX0.PSM, GSVector4i::loadh(size))))
{
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
continue;
@@ -4306,23 +4310,27 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo
}
}
}*/
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw)
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw, u32 fb_mask, bool ignore_exact)
{
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24);
for (int type = 0; type < 2; type++)
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24) || (fb_mask & 0xFF000000);
for (int type = 0; type < (ignore_exact ? 1 : 2); type++)
{
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end();)
{
Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp))
if ((ignore_exact && start_bp == t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp)))
{
++i;
continue;
}
const bool compatible_fmt = GSUtil::HasCompatibleBits(t->m_TEX0.PSM, write_psm);
const bool compatible_width = std::max(t->m_TEX0.TBW, 1U) == std::max(write_bw, 1U);
// If not fully contained but they are aligned and or clean, just dirty the area.
if (type != DepthStencil && start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
if ((type != DepthStencil || !compatible_fmt || !compatible_width) && start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
{
const u32 offset = (std::abs(static_cast<int>(start_bp - t->m_TEX0.TBP0)) >> 5) % std::max(1U, t->m_TEX0.TBW);
const GSVector4i dirty_rect = t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(t->m_valid);
@@ -4338,7 +4346,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm);
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, invalidate_r, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
}
@@ -4368,7 +4376,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
t->m_valid_alpha_low &= preserve_alpha;
t->m_valid_alpha_high &= preserve_alpha;
t->m_valid_rgb = false;
t->m_valid_rgb &= (fb_mask & 0x00FFFFFF) != 0;
// Don't keep partial depth buffers around.
if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil)
@@ -4390,6 +4398,16 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
delete t;
continue;
}
else if (ignore_exact && GSUtil::HasCompatibleBits(t->m_TEX0.PSM, write_psm))
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm, fb_mask);
AddDirtyRectTarget(t, t->m_valid, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
t->m_valid_rgb |= !!(mask._u32 & 0x7);
t->m_valid_alpha_low |= mask.c.a;
t->m_valid_alpha_high |= mask.c.a;
}
GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, GSUtil::GetPSMName(t->m_TEX0.PSM));
++i;

View File

@@ -32,8 +32,18 @@ public:
constexpr static bool CheckOverlap(const u32 a_bp, const u32 a_bp_end, const u32 b_bp, const u32 b_bp_end) noexcept
{
const bool valid = a_bp <= a_bp_end && b_bp <= b_bp_end;
const bool overlap = a_bp <= b_bp_end && a_bp_end >= b_bp;
u32 b_bp_start_synced = b_bp;
u32 b_bp_end_synced = b_bp_end;
// Check for wrapping
if (a_bp_end > GS_MAX_BLOCKS && b_bp_end < a_bp)
{
b_bp_start_synced += GS_MAX_BLOCKS;
b_bp_end_synced += GS_MAX_BLOCKS;
}
const bool valid = a_bp <= a_bp_end && b_bp_start_synced <= b_bp_end_synced;
const bool overlap = a_bp <= b_bp_end_synced && a_bp_end >= b_bp_start_synced;
return valid && overlap;
}
@@ -522,7 +532,7 @@ public:
bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits<u32>::max(), bool move_front = true);
bool Has32BitTarget(u32 bp);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1, u32 fb_mask = 0x00000000, bool ignore_exact = false);
void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);

View File

@@ -481,72 +481,26 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data());
DevCon.WriteLn("%u vulkan queue families", queue_family_count);
std::vector<uint32_t> queue_family_users(queue_family_count, 0);
// Find graphics and present queues.
m_graphics_queue_family_index = queue_family_count;
m_present_queue_family_index = queue_family_count;
u32 present_queue_index = 0;
m_spin_queue_family_index = queue_family_count;
u32 spin_queue_index = 0;
// Graphics Queue
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)
VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
if (graphics_supported)
{
m_graphics_queue_family_index = i;
queue_family_users[i]++;
break;
// Quit now, no need for a present queue.
if (!surface)
{
break;
}
}
}
// Spinwait Queue
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueCount == queue_family_users[i])
continue;
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
continue;
if (queue_family_properties[i].timestampValidBits == 0)
continue; // We need timing
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
if (surface)
{
m_spin_queue_family_index = i;
break;
}
else if (m_spin_queue_family_index == queue_family_count)
m_spin_queue_family_index = i;
}
if (m_spin_queue_family_index != queue_family_count)
{
spin_queue_index = queue_family_users[m_spin_queue_family_index];
queue_family_users[m_spin_queue_family_index]++;
m_spin_queue_is_graphics_queue = false;
}
else
{
// No spare queue? Try the graphics queue.
if ((queue_family_properties[m_graphics_queue_family_index].queueFlags & VK_QUEUE_COMPUTE_BIT) &&
(queue_family_properties[m_graphics_queue_family_index].timestampValidBits != 0))
{
m_spin_queue_family_index = m_graphics_queue_family_index;
spin_queue_index = 0;
m_spin_queue_is_graphics_queue = true;
}
else
m_spin_queue_is_graphics_queue = false;
}
// Present Queue
if (surface)
{
for (uint32_t i = 0; i < queue_family_count; i++)
{
if (queue_family_properties[i].queueCount == queue_family_users[i])
continue;
VkBool32 present_supported;
VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported);
if (res != VK_SUCCESS)
@@ -555,48 +509,35 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
return false;
}
if (!present_supported)
continue;
// Perfer aync compute queue
if ((queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT) &&
!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
{
m_present_queue_family_index = i;
break;
}
else if (m_present_queue_family_index == queue_family_count)
m_present_queue_family_index = i;
}
if (m_present_queue_family_index != queue_family_count)
{
present_queue_index = queue_family_users[m_present_queue_family_index];
queue_family_users[m_present_queue_family_index]++;
}
else
{
// No spare queue? Try the graphics queue.
VkBool32 present_supported;
VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, m_graphics_queue_family_index, surface, &present_supported);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
return false;
}
if (present_supported)
{
m_present_queue_family_index = m_graphics_queue_family_index;
present_queue_index = 0;
m_present_queue_family_index = i;
}
// Prefer one queue family index that does both graphics and present.
if (graphics_supported && present_supported)
{
break;
}
}
}
// Swap spin and present to simplify queue priorities logic.
if (!m_spin_queue_is_graphics_queue && m_present_queue_family_index == m_spin_queue_family_index)
std::swap(spin_queue_index, present_queue_index);
for (uint32_t i = 0; i < queue_family_count; i++)
{
// Pick a queue for spinning
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
continue; // We need compute
if (queue_family_properties[i].timestampValidBits == 0)
continue; // We need timing
const bool queue_is_used = i == m_graphics_queue_family_index || i == m_present_queue_family_index;
if (queue_is_used && m_spin_queue_family_index != queue_family_count)
continue; // Found a non-graphics queue to use
spin_queue_index = 0;
m_spin_queue_family_index = i;
if (queue_is_used && queue_family_properties[i].queueCount > 1)
spin_queue_index = 1;
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
break; // Async compute queue, definitely pick this one
}
if (m_graphics_queue_family_index == queue_family_count)
{
Console.Error("VK: Failed to find an acceptable graphics queue.");
@@ -614,16 +555,14 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
device_info.flags = 0;
device_info.queueCreateInfoCount = 0;
// Low priority for the spin queue
static constexpr float queue_priorities[] = {1.0f, 1.0f, 0.0f};
static constexpr float queue_priorities[] = {1.0f, 0.0f}; // Low priority for the spin queue
std::array<VkDeviceQueueCreateInfo, 3> queue_infos;
VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
graphics_queue_info.pNext = nullptr;
graphics_queue_info.flags = 0;
graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index;
graphics_queue_info.queueCount = queue_family_users[m_graphics_queue_family_index];
graphics_queue_info.queueCount = 1;
graphics_queue_info.pQueuePriorities = queue_priorities;
if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
@@ -633,19 +572,19 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
present_queue_info.pNext = nullptr;
present_queue_info.flags = 0;
present_queue_info.queueFamilyIndex = m_present_queue_family_index;
present_queue_info.queueCount = queue_family_users[m_present_queue_family_index];
present_queue_info.queueCount = 1;
present_queue_info.pQueuePriorities = queue_priorities;
}
if (m_spin_queue_family_index == m_graphics_queue_family_index)
{
if (spin_queue_index == 1)
graphics_queue_info.pQueuePriorities = queue_priorities + 1;
if (spin_queue_index != 0)
graphics_queue_info.queueCount = 2;
}
else if (m_spin_queue_family_index == m_present_queue_family_index)
{
if (spin_queue_index == 1)
queue_infos[1].pQueuePriorities = queue_priorities + 1;
if (spin_queue_index != 0)
queue_infos[1].queueCount = 2; // present queue
}
else if (m_spin_queue_family_index != queue_family_count)
{
@@ -655,7 +594,7 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
spin_queue_info.flags = 0;
spin_queue_info.queueFamilyIndex = m_spin_queue_family_index;
spin_queue_info.queueCount = 1;
spin_queue_info.pQueuePriorities = queue_priorities + 2;
spin_queue_info.pQueuePriorities = queue_priorities + 1;
}
device_info.pQueueCreateInfos = queue_infos.data();
@@ -744,11 +683,13 @@ bool GSDeviceVK::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer
vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue);
if (surface)
{
vkGetDeviceQueue(m_device, m_present_queue_family_index, present_queue_index, &m_present_queue);
vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
}
m_spinning_supported = m_spin_queue_family_index != queue_family_count &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
m_device_properties.limits.timestampPeriod > 0;
m_spin_queue_is_graphics_queue =
m_spin_queue_family_index == m_graphics_queue_family_index && spin_queue_index == 0;
m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
@@ -1354,7 +1295,7 @@ void GSDeviceVK::SubmitCommandBuffer(VKSwapChain* present_swap_chain)
present_swap_chain->ResetImageAcquireResult();
const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
{
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.