diff --git a/.clang-format b/.clang-format index 3b7f8f2de..10ff7f094 100644 --- a/.clang-format +++ b/.clang-format @@ -10,7 +10,114 @@ AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Inline +AllowShortFunctionsOnASingleLine: InlineOnly +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 2 +UseTab: Never +... +--- +Language: ObjC +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: InlineOnly AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None diff --git a/CMakeLists.txt b/CMakeLists.txt index d8e9893d8..7fd78cac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ elseif(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") endif() # Set minimum OS version for macOS. 10.14 should work. -set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14.0" CACHE STRING "") +set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "") # Global options. if(NOT ANDROID) @@ -46,13 +46,7 @@ endif() if(SUPPORTS_WAYLAND) option(USE_WAYLAND "Support Wayland window system" ON) endif() -if((LINUX OR FREEBSD) OR ANDROID) - option(USE_EGL "Support EGL OpenGL context creation" ON) -endif() if((LINUX OR FREEBSD) AND NOT ANDROID) - option(USE_DRMKMS "Support DRM/KMS OpenGL contexts" OFF) - option(USE_FBDEV "Support FBDev OpenGL contexts" OFF) - option(USE_EVDEV "Support EVDev controller interface" OFF) option(USE_DBUS "Enable DBus support for screensaver inhibiting" ON) endif() @@ -122,21 +116,6 @@ endif() if(USE_WAYLAND) message(STATUS "Wayland support enabled") endif() -if(USE_DRMKMS AND USE_FBDEV) - message(FATAL_ERROR "Only one of DRM/KMS and FBDev can be enabled") -endif() -if(USE_DRMKMS) - find_package(GBM REQUIRED) - find_package(Libdrm REQUIRED) - message(STATUS "DRM/KMS support enabled") -endif() -if(USE_FBDEV) - message(STATUS "FBDev Support enabled") -endif() -if(USE_EVDEV) - message(STATUS "EVDev Support enabled") - find_package(LIBEVDEV REQUIRED) -endif() if(ENABLE_CHEEVOS) message(STATUS "RetroAchievements support enabled") endif() diff --git a/CMakeModules/FindGBM.cmake b/CMakeModules/FindGBM.cmake deleted file mode 100644 index 1447b5edd..000000000 --- a/CMakeModules/FindGBM.cmake +++ /dev/null @@ -1,70 +0,0 @@ -# https://fossies.org/linux/misc/xbmc-18.9-Leia.tar.gz/xbmc-18.9-Leia/cmake/modules/FindGBM.cmake?m=t - -# FindGBM -# ---------- -# Finds the GBM library -# -# This will define the following variables:: -# -# GBM_FOUND - system has GBM -# GBM_INCLUDE_DIRS - the GBM include directory -# GBM_LIBRARIES - the GBM libraries -# GBM_DEFINITIONS - the GBM definitions -# -# and the following imported targets:: -# -# GBM::GBM - The GBM library - -if(PKG_CONFIG_FOUND) - pkg_check_modules(PC_GBM gbm QUIET) -endif() - -find_path(GBM_INCLUDE_DIR NAMES gbm.h - PATHS ${PC_GBM_INCLUDEDIR}) -find_library(GBM_LIBRARY NAMES gbm - PATHS ${PC_GBM_LIBDIR}) - -set(GBM_VERSION ${PC_GBM_VERSION}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GBM - REQUIRED_VARS GBM_LIBRARY GBM_INCLUDE_DIR - VERSION_VAR GBM_VERSION) - -include(CheckCSourceCompiles) -set(CMAKE_REQUIRED_LIBRARIES ${GBM_LIBRARY}) -check_c_source_compiles("#include - - int main() - { - gbm_bo_map(NULL, 0, 0, 0, 0, GBM_BO_TRANSFER_WRITE, NULL, NULL); - } - " GBM_HAS_BO_MAP) - -check_c_source_compiles("#include - - int main() - { - gbm_surface_create_with_modifiers(NULL, 0, 0, 0, NULL, 0); - } - " GBM_HAS_MODIFIERS) - -if(GBM_FOUND) - set(GBM_LIBRARIES ${GBM_LIBRARY}) - set(GBM_INCLUDE_DIRS ${GBM_INCLUDE_DIR}) - set(GBM_DEFINITIONS -DHAVE_GBM=1) - if(GBM_HAS_BO_MAP) - list(APPEND GBM_DEFINITIONS -DHAS_GBM_BO_MAP=1) - endif() - if(GBM_HAS_MODIFIERS) - list(APPEND GBM_DEFINITIONS -DHAS_GBM_MODIFIERS=1) - endif() - if(NOT TARGET GBM::GBM) - add_library(GBM::GBM UNKNOWN IMPORTED) - set_target_properties(GBM::GBM PROPERTIES - IMPORTED_LOCATION "${GBM_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${GBM_INCLUDE_DIR}") - endif() -endif() - -mark_as_advanced(GBM_INCLUDE_DIR GBM_LIBRARY) diff --git a/CMakeModules/FindLIBEVDEV.cmake b/CMakeModules/FindLIBEVDEV.cmake deleted file mode 100644 index b560a27f6..000000000 --- a/CMakeModules/FindLIBEVDEV.cmake +++ /dev/null @@ -1,34 +0,0 @@ -# - Try to find libevdev -# Once done this will define -# LIBEVDEV_FOUND - System has libevdev -# LIBEVDEV_INCLUDE_DIRS - The libevdev include directories -# LIBEVDEV_LIBRARIES - The libraries needed to use libevdev - -find_package(PkgConfig) -pkg_check_modules(PC_LIBEVDEV QUIET libevdev) - -FIND_PATH( - LIBEVDEV_INCLUDE_DIR libevdev/libevdev.h - HINTS ${PC_LIBEVDEV_INCLUDEDIR} ${PC_LIBEVDEV_INCLUDE_DIRS} - /usr/include - /usr/local/include - /usr/local/include/libevdev-1.0 - ${LIBEVDEV_PATH_INCLUDES} -) - -FIND_LIBRARY( - LIBEVDEV_LIBRARY - NAMES evdev libevdev - HINTS ${PC_LIBEVDEV_LIBDIR} ${PC_LIBEVDEV_LIBRARY_DIRS} - PATHS ${ADDITIONAL_LIBRARY_PATHS} - ${LIBEVDEV_PATH_LIB} -) - -set(LIBEVDEV_LIBRARIES ${LIBEVDEV_LIBRARY} ) -set(LIBEVDEV_INCLUDE_DIRS ${LIBEVDEV_INCLUDE_DIR} ) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(LIBEVDEV DEFAULT_MSG - LIBEVDEV_LIBRARY LIBEVDEV_INCLUDE_DIR) - -mark_as_advanced(LIBEVDEV_INCLUDE_DIR LIBEVDEV_LIBRARY ) diff --git a/CMakeModules/FindLibdrm.cmake b/CMakeModules/FindLibdrm.cmake deleted file mode 100644 index 2df135812..000000000 --- a/CMakeModules/FindLibdrm.cmake +++ /dev/null @@ -1,107 +0,0 @@ -# https://raw.githubusercontent.com/KDE/kwin/master/cmake/modules/FindLibdrm.cmake - -#.rst: -# FindLibdrm -# ------- -# -# Try to find libdrm on a Unix system. -# -# This will define the following variables: -# -# ``Libdrm_FOUND`` -# True if (the requested version of) libdrm is available -# ``Libdrm_VERSION`` -# The version of libdrm -# ``Libdrm_LIBRARIES`` -# This can be passed to target_link_libraries() instead of the ``Libdrm::Libdrm`` -# target -# ``Libdrm_INCLUDE_DIRS`` -# This should be passed to target_include_directories() if the target is not -# used for linking -# ``Libdrm_DEFINITIONS`` -# This should be passed to target_compile_options() if the target is not -# used for linking -# -# If ``Libdrm_FOUND`` is TRUE, it will also define the following imported target: -# -# ``Libdrm::Libdrm`` -# The libdrm library -# -# In general we recommend using the imported target, as it is easier to use. -# Bear in mind, however, that if the target is in the link interface of an -# exported library, it must be made available by the package config file. - -#============================================================================= -# SPDX-FileCopyrightText: 2014 Alex Merry -# SPDX-FileCopyrightText: 2014 Martin Gräßlin -# -# SPDX-License-Identifier: BSD-3-Clause -#============================================================================= - -if(CMAKE_VERSION VERSION_LESS 2.8.12) - message(FATAL_ERROR "CMake 2.8.12 is required by FindLibdrm.cmake") -endif() -if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12) - message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use FindLibdrm.cmake") -endif() - -if(NOT WIN32) - # Use pkg-config to get the directories and then use these values - # in the FIND_PATH() and FIND_LIBRARY() calls - find_package(PkgConfig) - pkg_check_modules(PKG_Libdrm QUIET libdrm) - - set(Libdrm_DEFINITIONS ${PKG_Libdrm_CFLAGS_OTHER}) - set(Libdrm_VERSION ${PKG_Libdrm_VERSION}) - - find_path(Libdrm_INCLUDE_DIR - NAMES - xf86drm.h - HINTS - ${PKG_Libdrm_INCLUDE_DIRS} - ) - find_library(Libdrm_LIBRARY - NAMES - drm - HINTS - ${PKG_Libdrm_LIBRARY_DIRS} - ) - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(Libdrm - FOUND_VAR - Libdrm_FOUND - REQUIRED_VARS - Libdrm_LIBRARY - Libdrm_INCLUDE_DIR - VERSION_VAR - Libdrm_VERSION - ) - - if(Libdrm_FOUND AND NOT TARGET Libdrm::Libdrm) - add_library(Libdrm::Libdrm UNKNOWN IMPORTED) - set_target_properties(Libdrm::Libdrm PROPERTIES - IMPORTED_LOCATION "${Libdrm_LIBRARY}" - INTERFACE_COMPILE_OPTIONS "${Libdrm_DEFINITIONS}" - INTERFACE_INCLUDE_DIRECTORIES "${Libdrm_INCLUDE_DIR}" - INTERFACE_INCLUDE_DIRECTORIES "${Libdrm_INCLUDE_DIR}/libdrm" - ) - endif() - - mark_as_advanced(Libdrm_LIBRARY Libdrm_INCLUDE_DIR) - - # compatibility variables - set(Libdrm_LIBRARIES ${Libdrm_LIBRARY}) - set(Libdrm_INCLUDE_DIRS ${Libdrm_INCLUDE_DIR} "${Libdrm_INCLUDE_DIR}/libdrm") - set(Libdrm_VERSION_STRING ${Libdrm_VERSION}) - -else() - message(STATUS "FindLibdrm.cmake cannot find libdrm on Windows systems.") - set(Libdrm_FOUND FALSE) -endif() - -include(FeatureSummary) -set_package_properties(Libdrm PROPERTIES - URL "https://wiki.freedesktop.org/dri/" - DESCRIPTION "Userspace interface to kernel DRM services." -) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index ba996adee..defac0066 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -17,8 +17,6 @@ add_library(common fifo_queue.h file_system.cpp file_system.h - gpu_texture.cpp - gpu_texture.h image.cpp image.h hash_combine.h @@ -57,8 +55,6 @@ add_library(common timer.cpp timer.h types.h - window_info.cpp - window_info.h ) target_include_directories(common PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") @@ -68,28 +64,6 @@ target_link_libraries(common PRIVATE stb libchdr zlib minizip Zstd::Zstd "${CMAK if(WIN32) target_sources(common PRIVATE - d3d12/context.cpp - d3d12/context.h - d3d12/descriptor_heap_manager.cpp - d3d12/descriptor_heap_manager.h - d3d12/shader_cache.cpp - d3d12/shader_cache.h - d3d12/staging_texture.cpp - d3d12/staging_texture.h - d3d12/stream_buffer.cpp - d3d12/stream_buffer.h - d3d12/texture.cpp - d3d12/texture.h - d3d12/util.cpp - d3d12/util.h - d3d11/shader_cache.cpp - d3d11/shader_cache.h - d3d11/shader_compiler.cpp - d3d11/shader_compiler.h - d3d11/stream_buffer.cpp - d3d11/stream_buffer.h - d3d11/texture.cpp - d3d11/texture.h http_downloader_winhttp.cpp http_downloader_winhttp.h thirdparty/StackWalker.cpp @@ -123,147 +97,6 @@ if(ANDROID) target_link_libraries(common PRIVATE log) endif() -if(USE_X11) - target_sources(common PRIVATE - gl/x11_window.cpp - gl/x11_window.h - ) - target_compile_definitions(common PRIVATE "-DUSE_X11=1") - target_include_directories(common PRIVATE "${X11_INCLUDE_DIR}" "${X11_Xrandr_INCLUDE_PATH}") - target_link_libraries(common PRIVATE "${X11_LIBRARIES}" "${X11_Xrandr_LIB}") -endif() - -if(USE_WAYLAND) - target_compile_definitions(common PRIVATE "-DUSE_WAYLAND=1") -elseif(SUPPORTS_WAYLAND) - message(WARNING "Wayland support for renderers is disabled.\nDuckStation will FAIL to start on Wayland.") -endif() - -if(USE_DRMKMS) - target_sources(common PRIVATE - drm_display.cpp - drm_display.h - ) - target_link_libraries(common PUBLIC Libdrm::Libdrm) -endif() - -if(ENABLE_OPENGL) - target_sources(common PRIVATE - gl/context.cpp - gl/context.h - gl/program.cpp - gl/program.h - gl/shader_cache.cpp - gl/shader_cache.h - gl/stream_buffer.cpp - gl/stream_buffer.h - gl/texture.cpp - gl/texture.h - ) - target_compile_definitions(common PUBLIC "WITH_OPENGL=1") - target_link_libraries(common PRIVATE glad) - - if(WIN32) - target_sources(common PRIVATE - gl/context_wgl.cpp - gl/context_wgl.h - ) - endif() - - if(USE_EGL) - target_sources(common PRIVATE - gl/context_egl.cpp - gl/context_egl.h - ) - target_compile_definitions(common PRIVATE "-DUSE_EGL=1") - - if(USE_X11) - target_sources(common PRIVATE - gl/context_egl_x11.cpp - gl/context_egl_x11.h - ) - - # We set EGL_NO_X11 because otherwise X comes in with its macros and breaks - # a bunch of files from compiling, if we include the EGL headers. This just - # makes the data types opaque, we can still use it with X11 if needed. - target_compile_definitions(common PRIVATE "-DEGL_NO_X11=1") - endif() - if(ANDROID AND USE_EGL) - target_sources(common PRIVATE - gl/context_egl_android.cpp - gl/context_egl_android.h - ) - endif() - if(USE_DRMKMS) - target_compile_definitions(common PRIVATE "-DUSE_GBM=1") - target_sources(common PRIVATE - gl/context_egl_gbm.cpp - gl/context_egl_gbm.h - ) - target_link_libraries(common PUBLIC GBM::GBM) - endif() - if(USE_FBDEV) - target_compile_definitions(common PRIVATE "-DUSE_FBDEV=1") - target_sources(common PRIVATE - gl/context_egl_fbdev.cpp - gl/context_egl_fbdev.h - ) - endif() - endif() - - if(USE_X11) - target_sources(common PRIVATE - gl/context_glx.cpp - gl/context_glx.h - ) - target_compile_definitions(common PRIVATE "-DUSE_GLX=1") - endif() - - if(USE_WAYLAND) - target_sources(common PRIVATE - gl/context_egl_wayland.cpp - gl/context_egl_wayland.h - ) - endif() - - if(APPLE) - target_sources(common PRIVATE - gl/context_agl.mm - gl/context_agl.h - ) - endif() -endif() - -if(ENABLE_VULKAN) - target_sources(common PRIVATE - vulkan/builders.cpp - vulkan/builders.h - vulkan/context.cpp - vulkan/context.h - vulkan/loader.h - vulkan/loader.cpp - vulkan/shader_cache.cpp - vulkan/shader_cache.h - vulkan/shader_compiler.cpp - vulkan/shader_compiler.h - vulkan/stream_buffer.cpp - vulkan/stream_buffer.h - vulkan/swap_chain.cpp - vulkan/swap_chain.h - vulkan/texture.cpp - vulkan/texture.h - vulkan/util.cpp - vulkan/util.h - ) - target_compile_definitions(common PUBLIC "WITH_VULKAN=1") - target_link_libraries(common PRIVATE glslang) - - if(APPLE) - # Needed for Vulkan Swap Chain. - target_link_libraries(common PRIVATE "objc") - endif() -endif() - if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") # We need -lrt for shm_unlink target_link_libraries(common PRIVATE rt) diff --git a/src/common/align.h b/src/common/align.h index 0aa5faf4b..1f72e7dde 100644 --- a/src/common/align.h +++ b/src/common/align.h @@ -3,6 +3,14 @@ #pragma once +#include "types.h" + +#include + +#ifdef _MSC_VER +#include +#endif + namespace Common { template constexpr bool IsAligned(T value, unsigned int alignment) @@ -52,4 +60,30 @@ constexpr T PreviousPow2(T value) value |= (value >> 16); return value - (value >> 1); } + +ALWAYS_INLINE static void* AlignedMalloc(size_t size, size_t alignment) +{ +#ifdef _MSC_VER + return _aligned_malloc(size, alignment); +#else + // Unaligned sizes are slow on macOS. +#ifdef __APPLE__ + if (IsPow2(alignment)) + size = (size + alignment - 1) & ~(alignment - 1); +#endif + void* ret = nullptr; + posix_memalign(&ret, alignment, size); + return ret; +#endif +} + +ALWAYS_INLINE static void AlignedFree(void* ptr) +{ +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + } // namespace Common diff --git a/src/common/common.props b/src/common/common.props index f4060baf8..df0058a9c 100644 --- a/src/common/common.props +++ b/src/common/common.props @@ -2,15 +2,13 @@ - WITH_OPENGL=1;WITH_VULKAN=1;%(PreprocessorDefinitions) - $(SolutionDir)dep\glad\include;$(SolutionDir)dep\vulkan\include;%(AdditionalIncludeDirectories) - $(SolutionDir)src;%(AdditionalIncludeDirectories);$(SolutionDir)dep\gsl\include;$(SolutionDir)dep\fast_float\include;$(SolutionDir)dep\fmt\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\glslang;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include + $(SolutionDir)src;%(AdditionalIncludeDirectories);$(SolutionDir)dep\gsl\include;$(SolutionDir)dep\fast_float\include;$(SolutionDir)dep\fmt\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\stb\include - d3dcompiler.lib;d3d11.lib;%(AdditionalDependencies) + %(AdditionalDependencies);Comctl32.lib;winhttp.lib diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj index 251d8e3c5..b9b63fba3 100644 --- a/src/common/common.vcxproj +++ b/src/common/common.vcxproj @@ -9,45 +9,12 @@ - - - - - - - - - - - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - @@ -74,77 +41,16 @@ - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - - - - - - - - - - - - - true - - - true - - - true - - - true - - - true - - - true - - @@ -160,35 +66,7 @@ - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - - true - - @@ -204,20 +82,11 @@ _M_X86_32;%(PreprocessorDefinitions) _M_X86_64;%(PreprocessorDefinitions) - - true - {8be398e6-b882-4248-9065-fecc8728e038} - - {43540154-9e1e-409c-834f-b84be5621388} - - - {7f909e29-4808-4bd9-a60c-56c51a3aaec2} - {8bda439c-6358-45fb-9994-2ff083babe06} @@ -251,4 +120,4 @@ - \ No newline at end of file + diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters index 35ea858b7..76ed42311 100644 --- a/src/common/common.vcxproj.filters +++ b/src/common/common.vcxproj.filters @@ -5,24 +5,6 @@ - - gl - - - gl - - - gl - - - d3d11 - - - d3d11 - - - d3d11 - @@ -33,47 +15,10 @@ - - d3d11 - - - gl - - - gl - - - gl - - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - @@ -86,40 +31,10 @@ - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - vulkan - - - vulkan - - - gl - @@ -128,28 +43,9 @@ - - - gl - - - gl - - - gl - - - d3d11 - - - d3d11 - - - d3d11 - @@ -158,43 +54,7 @@ - - d3d11 - - - gl - - - gl - - - gl - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - - - vulkan - @@ -203,64 +63,21 @@ - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - d3d12 - - - vulkan - - - - {52487c57-753d-4888-ba26-ed63ab51a234} - - - {30251086-81f3-44f5-add4-7ff9a24098ab} - - - {642ff5eb-af39-4aab-a42f-6eb8188a11d7} - {fd4150b0-6f82-4251-ab23-34c25fbc5b5e} - - {358e11c4-34af-4169-9a66-ec66342a6a2f} - - - - - vulkan - @@ -268,4 +85,4 @@ - \ No newline at end of file + diff --git a/src/common/d3d11/shader_cache.cpp b/src/common/d3d11/shader_cache.cpp deleted file mode 100644 index b1c9b5c0e..000000000 --- a/src/common/d3d11/shader_cache.cpp +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_cache.h" -#include "../file_system.h" -#include "../log.h" -#include "../md5_digest.h" -#include "shader_compiler.h" -#include -Log_SetChannel(D3D11::ShaderCache); - -namespace D3D11 { - -#pragma pack(push, 1) -struct CacheIndexEntry -{ - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - u32 shader_type; - u32 file_offset; - u32 blob_size; -}; -#pragma pack(pop) - -ShaderCache::ShaderCache() = default; - -ShaderCache::~ShaderCache() -{ - if (m_index_file) - std::fclose(m_index_file); - if (m_blob_file) - std::fclose(m_blob_file); -} - -bool ShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const -{ - return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high && - source_length == key.source_length && shader_type == key.shader_type); -} - -bool ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const -{ - return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high || - source_length != key.source_length || shader_type != key.shader_type); -} - -void ShaderCache::Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, u32 version, bool debug) -{ - m_feature_level = feature_level; - m_version = version; - m_debug = debug; - - if (!base_path.empty()) - { - const std::string base_filename = GetCacheBaseFileName(base_path, feature_level, debug); - const std::string index_filename = base_filename + ".idx"; - const std::string blob_filename = base_filename + ".bin"; - - if (!ReadExisting(index_filename, blob_filename)) - CreateNew(index_filename, blob_filename); - } -} - -bool ShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename) -{ - if (FileSystem::FileExists(index_filename.c_str())) - { - Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); - FileSystem::DeleteFile(index_filename.c_str()); - } - if (FileSystem::FileExists(blob_filename.c_str())) - { - Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); - FileSystem::DeleteFile(blob_filename.c_str()); - } - - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); - if (!m_index_file) - { - Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); - return false; - } - - const u32 index_version = FILE_VERSION; - if (std::fwrite(&index_version, sizeof(index_version), 1, m_index_file) != 1 || - std::fwrite(&m_version, sizeof(m_version), 1, m_index_file) != 1) - { - Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - return true; -} - -bool ShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename) -{ - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); - if (!m_index_file) - return false; - - u32 file_version = 0; - u32 data_version = 0; - if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != FILE_VERSION || - std::fread(&data_version, sizeof(data_version), 1, m_index_file) != 1 || data_version != m_version) - { - Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - std::fseek(m_blob_file, 0, SEEK_END); - const u32 blob_file_size = static_cast(std::ftell(m_blob_file)); - - for (;;) - { - CacheIndexEntry entry; - if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 || - (entry.file_offset + entry.blob_size) > blob_file_size) - { - if (std::feof(m_index_file)) - break; - - Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); - m_index.clear(); - std::fclose(m_blob_file); - m_blob_file = nullptr; - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - const CacheIndexKey key{entry.source_hash_low, entry.source_hash_high, entry.source_length, - static_cast(entry.shader_type)}; - const CacheIndexData data{entry.file_offset, entry.blob_size}; - m_index.emplace(key, data); - } - - // ensure we don't write before seeking - std::fseek(m_index_file, 0, SEEK_END); - - Log_InfoPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str()); - return true; -} - -std::string ShaderCache::GetCacheBaseFileName(const std::string_view& base_path, D3D_FEATURE_LEVEL feature_level, - bool debug) -{ - std::string base_filename(base_path); - base_filename += FS_OSPATH_SEPARATOR_STR "d3d_shaders_"; - - switch (feature_level) - { - case D3D_FEATURE_LEVEL_10_0: - base_filename += "sm40"; - break; - case D3D_FEATURE_LEVEL_10_1: - base_filename += "sm41"; - break; - case D3D_FEATURE_LEVEL_11_0: - base_filename += "sm50"; - break; - default: - base_filename += "unk"; - break; - } - - if (debug) - base_filename += "_debug"; - - return base_filename; -} - -ShaderCache::CacheIndexKey ShaderCache::GetCacheKey(ShaderCompiler::Type type, const std::string_view& shader_code) -{ - union - { - struct - { - u64 hash_low; - u64 hash_high; - }; - u8 hash[16]; - }; - - MD5Digest digest; - digest.Update(shader_code.data(), static_cast(shader_code.length())); - digest.Final(hash); - - return CacheIndexKey{hash_low, hash_high, static_cast(shader_code.length()), type}; -} - -ShaderCache::ComPtr ShaderCache::GetShaderBlob(ShaderCompiler::Type type, std::string_view shader_code) -{ - const auto key = GetCacheKey(type, shader_code); - auto iter = m_index.find(key); - if (iter == m_index.end()) - return CompileAndAddShaderBlob(key, shader_code); - - ComPtr blob; - HRESULT hr = D3DCreateBlob(iter->second.blob_size, blob.GetAddressOf()); - if (FAILED(hr) || std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || - std::fread(blob->GetBufferPointer(), 1, iter->second.blob_size, m_blob_file) != iter->second.blob_size) - { - Log_ErrorPrintf("Read blob from file failed"); - return {}; - } - - return blob; -} - -ShaderCache::ComPtr ShaderCache::GetVertexShader(ID3D11Device* device, std::string_view shader_code) -{ - ComPtr blob = GetShaderBlob(ShaderCompiler::Type::Vertex, std::move(shader_code)); - if (!blob) - return {}; - - return D3D11::ShaderCompiler::CreateVertexShader(device, blob.Get()); -} - -ShaderCache::ComPtr ShaderCache::GetGeometryShader(ID3D11Device* device, - std::string_view shader_code) -{ - ComPtr blob = GetShaderBlob(ShaderCompiler::Type::Geometry, std::move(shader_code)); - if (!blob) - return {}; - - return D3D11::ShaderCompiler::CreateGeometryShader(device, blob.Get()); -} - -ShaderCache::ComPtr ShaderCache::GetPixelShader(ID3D11Device* device, std::string_view shader_code) -{ - ComPtr blob = GetShaderBlob(ShaderCompiler::Type::Pixel, std::move(shader_code)); - if (!blob) - return {}; - - return D3D11::ShaderCompiler::CreatePixelShader(device, blob.Get()); -} - -ShaderCache::ComPtr ShaderCache::GetComputeShader(ID3D11Device* device, - std::string_view shader_code) -{ - ComPtr blob = GetShaderBlob(ShaderCompiler::Type::Compute, std::move(shader_code)); - if (!blob) - return {}; - - return D3D11::ShaderCompiler::CreateComputeShader(device, blob.Get()); -} - -ShaderCache::ComPtr ShaderCache::CompileAndAddShaderBlob(const CacheIndexKey& key, - std::string_view shader_code) -{ - ComPtr blob = ShaderCompiler::CompileShader(key.shader_type, m_feature_level, shader_code, m_debug); - if (!blob) - return {}; - - if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0) - return blob; - - CacheIndexData data; - data.file_offset = static_cast(std::ftell(m_blob_file)); - data.blob_size = static_cast(blob->GetBufferSize()); - - CacheIndexEntry entry = {}; - entry.source_hash_low = key.source_hash_low; - entry.source_hash_high = key.source_hash_high; - entry.source_length = key.source_length; - entry.shader_type = static_cast(key.shader_type); - entry.blob_size = data.blob_size; - entry.file_offset = data.file_offset; - - if (std::fwrite(blob->GetBufferPointer(), 1, entry.blob_size, m_blob_file) != entry.blob_size || - std::fflush(m_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || - std::fflush(m_index_file) != 0) - { - Log_ErrorPrintf("Failed to write shader blob to file"); - return blob; - } - - m_index.emplace(key, data); - return blob; -} - -} // namespace D3D11 diff --git a/src/common/d3d11/shader_cache.h b/src/common/d3d11/shader_cache.h deleted file mode 100644 index a608cda7a..000000000 --- a/src/common/d3d11/shader_cache.h +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../hash_combine.h" -#include "../types.h" -#include "../windows_headers.h" -#include "shader_compiler.h" -#include -#include -#include -#include -#include -#include - -namespace D3D11 { - -class ShaderCache -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - ShaderCache(); - ~ShaderCache(); - - void Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, u32 version, bool debug); - - ComPtr GetShaderBlob(ShaderCompiler::Type type, std::string_view shader_code); - - ComPtr GetVertexShader(ID3D11Device* device, std::string_view shader_code); - ComPtr GetGeometryShader(ID3D11Device* device, std::string_view shader_code); - ComPtr GetPixelShader(ID3D11Device* device, std::string_view shader_code); - ComPtr GetComputeShader(ID3D11Device* device, std::string_view shader_code); - -private: - static constexpr u32 FILE_VERSION = 2; - - struct CacheIndexKey - { - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - ShaderCompiler::Type shader_type; - - bool operator==(const CacheIndexKey& key) const; - bool operator!=(const CacheIndexKey& key) const; - }; - - struct CacheIndexEntryHasher - { - std::size_t operator()(const CacheIndexKey& e) const noexcept - { - std::size_t h = 0; - hash_combine(h, e.source_hash_low, e.source_hash_high, e.source_length, e.shader_type); - return h; - } - }; - - struct CacheIndexData - { - u32 file_offset; - u32 blob_size; - }; - - using CacheIndex = std::unordered_map; - - static std::string GetCacheBaseFileName(const std::string_view& base_path, D3D_FEATURE_LEVEL feature_level, - bool debug); - static CacheIndexKey GetCacheKey(ShaderCompiler::Type type, const std::string_view& shader_code); - - bool CreateNew(const std::string& index_filename, const std::string& blob_filename); - bool ReadExisting(const std::string& index_filename, const std::string& blob_filename); - void Close(); - - ComPtr CompileAndAddShaderBlob(const CacheIndexKey& key, std::string_view shader_code); - - std::FILE* m_index_file = nullptr; - std::FILE* m_blob_file = nullptr; - - CacheIndex m_index; - - D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; - u32 m_version = 0; - bool m_debug = false; -}; - -} // namespace D3D11 diff --git a/src/common/d3d11/shader_compiler.cpp b/src/common/d3d11/shader_compiler.cpp deleted file mode 100644 index 197361064..000000000 --- a/src/common/d3d11/shader_compiler.cpp +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_compiler.h" -#include "../log.h" -#include "../string_util.h" -#include -#include -#include -Log_SetChannel(D3D11); - -namespace D3D11::ShaderCompiler { - -static unsigned s_next_bad_shader_id = 1; - -ComPtr CompileShader(Type type, D3D_FEATURE_LEVEL feature_level, std::string_view code, bool debug) -{ - const char* target; - switch (feature_level) - { - case D3D_FEATURE_LEVEL_10_0: - { - static constexpr std::array targets = {{"vs_4_0", "gs_4_0", "ps_4_0", "cs_4_0"}}; - target = targets[static_cast(type)]; - } - break; - - case D3D_FEATURE_LEVEL_10_1: - { - static constexpr std::array targets = {{"vs_4_1", "gs_4_1", "ps_4_1", "cs_4_1"}}; - target = targets[static_cast(type)]; - } - break; - - case D3D_FEATURE_LEVEL_11_0: - { - static constexpr std::array targets = {{"vs_5_0", "gs_5_0", "ps_5_0", "cs_5_0"}}; - target = targets[static_cast(type)]; - } - break; - - case D3D_FEATURE_LEVEL_11_1: - default: - { - static constexpr std::array targets = {{"vs_5_1", "gs_5_1", "ps_5_1", "cs_5_1"}}; - target = targets[static_cast(type)]; - } - break; - } - - static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3; - static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG; - - ComPtr blob; - ComPtr error_blob; - const HRESULT hr = - D3DCompile(code.data(), code.size(), "0", nullptr, nullptr, "main", target, debug ? flags_debug : flags_non_debug, - 0, blob.GetAddressOf(), error_blob.GetAddressOf()); - - std::string error_string; - if (error_blob) - { - error_string.append(static_cast(error_blob->GetBufferPointer()), error_blob->GetBufferSize()); - error_blob.Reset(); - } - - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to compile '%s':\n%s", target, error_string.c_str()); - - std::ofstream ofs(StringUtil::StdStringFromFormat("bad_shader_%u.txt", s_next_bad_shader_id++).c_str(), - std::ofstream::out | std::ofstream::binary); - if (ofs.is_open()) - { - ofs << code; - ofs << "\n\nCompile as " << target << " failed: " << hr << "\n"; - ofs.write(error_string.c_str(), error_string.size()); - ofs.close(); - } - - return {}; - } - - if (!error_string.empty()) - Log_WarningPrintf("'%s' compiled with warnings:\n%s", target, error_string.c_str()); - - return blob; -} - -ComPtr CompileAndCreateVertexShader(ID3D11Device* device, std::string_view code, bool debug) -{ - ComPtr blob = CompileShader(Type::Vertex, device->GetFeatureLevel(), std::move(code), debug); - if (!blob) - return {}; - - return CreateVertexShader(device, blob.Get()); -} - -ComPtr CompileAndCreateGeometryShader(ID3D11Device* device, std::string_view code, bool debug) -{ - ComPtr blob = CompileShader(Type::Geometry, device->GetFeatureLevel(), std::move(code), debug); - if (!blob) - return {}; - - return CreateGeometryShader(device, blob.Get()); -} - -ComPtr CompileAndCreatePixelShader(ID3D11Device* device, std::string_view code, bool debug) -{ - ComPtr blob = CompileShader(Type::Pixel, device->GetFeatureLevel(), std::move(code), debug); - if (!blob) - return {}; - - return CreatePixelShader(device, blob.Get()); -} - -ComPtr CompileAndCreateComputeShader(ID3D11Device* device, std::string_view code, bool debug) -{ - ComPtr blob = CompileShader(Type::Compute, device->GetFeatureLevel(), std::move(code), debug); - if (!blob) - return {}; - - return CreateComputeShader(device, blob.Get()); -} - -ComPtr CreateVertexShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) -{ - ComPtr shader; - const HRESULT hr = device->CreateVertexShader(bytecode, bytecode_length, nullptr, shader.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create vertex shader: 0x%08X", hr); - return {}; - } - - return shader; -} - -ComPtr CreateVertexShader(ID3D11Device* device, const ID3DBlob* blob) -{ - return CreateVertexShader(device, const_cast(blob)->GetBufferPointer(), - const_cast(blob)->GetBufferSize()); -} - -ComPtr CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) -{ - ComPtr shader; - const HRESULT hr = device->CreateGeometryShader(bytecode, bytecode_length, nullptr, shader.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create geometry shader: 0x%08X", hr); - return {}; - } - - return shader; -} - -ComPtr CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob) -{ - return CreateGeometryShader(device, const_cast(blob)->GetBufferPointer(), - const_cast(blob)->GetBufferSize()); -} - -ComPtr CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) -{ - ComPtr shader; - const HRESULT hr = device->CreatePixelShader(bytecode, bytecode_length, nullptr, shader.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create pixel shader: 0x%08X", hr); - return {}; - } - - return shader; -} - -ComPtr CreatePixelShader(ID3D11Device* device, const ID3DBlob* blob) -{ - return CreatePixelShader(device, const_cast(blob)->GetBufferPointer(), - const_cast(blob)->GetBufferSize()); -} - -ComPtr CreateComputeShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length) -{ - ComPtr shader; - const HRESULT hr = device->CreateComputeShader(bytecode, bytecode_length, nullptr, shader.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create compute shader: 0x%08X", hr); - return {}; - } - - return shader; -} - -ComPtr CreateComputeShader(ID3D11Device* device, const ID3DBlob* blob) -{ - return CreateComputeShader(device, const_cast(blob)->GetBufferPointer(), - const_cast(blob)->GetBufferSize()); -} - -} // namespace D3D11::ShaderCompiler \ No newline at end of file diff --git a/src/common/d3d11/shader_compiler.h b/src/common/d3d11/shader_compiler.h deleted file mode 100644 index 176c45158..000000000 --- a/src/common/d3d11/shader_compiler.h +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../windows_headers.h" -#include -#include -#include -#include - -namespace D3D11::ShaderCompiler { -template -using ComPtr = Microsoft::WRL::ComPtr; - -enum class Type -{ - Vertex, - Geometry, - Pixel, - Compute -}; - -ComPtr CompileShader(Type type, D3D_FEATURE_LEVEL feature_level, std::string_view code, bool debug); - -ComPtr CompileAndCreateVertexShader(ID3D11Device* device, std::string_view code, bool debug); -ComPtr CompileAndCreateGeometryShader(ID3D11Device* device, std::string_view code, bool debug); -ComPtr CompileAndCreatePixelShader(ID3D11Device* device, std::string_view code, bool debug); -ComPtr CompileAndCreateComputeShader(ID3D11Device* device, std::string_view code, bool debug); - -ComPtr CreateVertexShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); -ComPtr CreateVertexShader(ID3D11Device* device, const ID3DBlob* blob); -ComPtr CreateGeometryShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); -ComPtr CreateGeometryShader(ID3D11Device* device, const ID3DBlob* blob); -ComPtr CreatePixelShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); -ComPtr CreatePixelShader(ID3D11Device* device, const ID3DBlob* blob); -ComPtr CreateComputeShader(ID3D11Device* device, const void* bytecode, size_t bytecode_length); -ComPtr CreateComputeShader(ID3D11Device* device, const ID3DBlob* blob); - -}; // namespace D3D11::ShaderCompiler diff --git a/src/common/d3d11/texture.cpp b/src/common/d3d11/texture.cpp deleted file mode 100644 index df2d3f599..000000000 --- a/src/common/d3d11/texture.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "texture.h" -#include "../log.h" -#include -Log_SetChannel(D3D11); - -static constexpr std::array(GPUTexture::Format::Count)> s_dxgi_mapping = { - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B5G6R5_UNORM, - DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_D16_UNORM}}; - -D3D11::Texture::Texture() = default; - -D3D11::Texture::Texture(ComPtr texture, ComPtr srv, - ComPtr rtv) - : m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv(std::move(rtv)) -{ - const D3D11_TEXTURE2D_DESC desc = GetDesc(); - m_width = static_cast(desc.Width); - m_height = static_cast(desc.Height); - m_layers = static_cast(desc.ArraySize); - m_levels = static_cast(desc.MipLevels); - m_samples = static_cast(desc.SampleDesc.Count); - m_format = LookupBaseFormat(desc.Format); - m_dynamic = (desc.Usage == D3D11_USAGE_DYNAMIC); -} - -D3D11::Texture::~Texture() -{ - Destroy(); -} - -DXGI_FORMAT D3D11::Texture::GetDXGIFormat(Format format) -{ - return s_dxgi_mapping[static_cast(format)]; -} - -GPUTexture::Format D3D11::Texture::LookupBaseFormat(DXGI_FORMAT dformat) -{ - for (u32 i = 0; i < static_cast(s_dxgi_mapping.size()); i++) - { - if (s_dxgi_mapping[i] == dformat) - return static_cast(i); - } - return GPUTexture::Format::Unknown; -} - -D3D11_TEXTURE2D_DESC D3D11::Texture::GetDesc() const -{ - D3D11_TEXTURE2D_DESC desc; - m_texture->GetDesc(&desc); - return desc; -} - -bool D3D11::Texture::IsValid() const -{ - return static_cast(m_texture); -} - -bool D3D11::Texture::Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, - Format format, u32 bind_flags, const void* initial_data /* = nullptr */, - u32 initial_data_stride /* = 0 */, bool dynamic /* = false */) -{ - if (width > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION || height > D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION || - layers > D3D11_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION || (layers > 1 && samples > 1)) - { - Log_ErrorPrintf("Texture bounds (%ux%ux%u, %u mips, %u samples) are too large", width, height, layers, levels, - samples); - return false; - } - - CD3D11_TEXTURE2D_DESC desc(GetDXGIFormat(format), width, height, layers, levels, bind_flags, - dynamic ? D3D11_USAGE_DYNAMIC : D3D11_USAGE_DEFAULT, dynamic ? D3D11_CPU_ACCESS_WRITE : 0, - samples, 0, 0); - - D3D11_SUBRESOURCE_DATA srd; - srd.pSysMem = initial_data; - srd.SysMemPitch = initial_data_stride; - srd.SysMemSlicePitch = initial_data_stride * height; - - ComPtr texture; - const HRESULT tex_hr = device->CreateTexture2D(&desc, initial_data ? &srd : nullptr, texture.GetAddressOf()); - if (FAILED(tex_hr)) - { - Log_ErrorPrintf( - "Create texture failed: 0x%08X (%ux%u levels:%u samples:%u format:%u bind_flags:%X initial_data:%p)", tex_hr, - width, height, levels, samples, static_cast(format), bind_flags, initial_data); - return false; - } - - ComPtr srv; - if (bind_flags & D3D11_BIND_SHADER_RESOURCE) - { - const D3D11_SRV_DIMENSION srv_dimension = - (desc.SampleDesc.Count > 1) ? - D3D11_SRV_DIMENSION_TEXTURE2DMS : - (desc.ArraySize > 1 ? D3D11_SRV_DIMENSION_TEXTURE2DARRAY : D3D11_SRV_DIMENSION_TEXTURE2D); - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); - const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create SRV for texture failed: 0x%08X", hr); - return false; - } - } - - ComPtr rtv; - if (bind_flags & D3D11_BIND_RENDER_TARGET) - { - const D3D11_RTV_DIMENSION rtv_dimension = - (desc.SampleDesc.Count > 1) ? D3D11_RTV_DIMENSION_TEXTURE2DMS : D3D11_RTV_DIMENSION_TEXTURE2D; - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); - const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create RTV for texture failed: 0x%08X", hr); - return false; - } - } - - m_texture = std::move(texture); - m_srv = std::move(srv); - m_rtv = std::move(rtv); - m_width = static_cast(width); - m_height = static_cast(height); - m_layers = static_cast(layers); - m_levels = static_cast(levels); - m_samples = static_cast(samples); - m_format = format; - m_dynamic = dynamic; - return true; -} - -bool D3D11::Texture::Adopt(ID3D11Device* device, ComPtr texture) -{ - D3D11_TEXTURE2D_DESC desc; - texture->GetDesc(&desc); - - ComPtr srv; - if (desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) - { - const D3D11_SRV_DIMENSION srv_dimension = - (desc.SampleDesc.Count > 1) ? D3D11_SRV_DIMENSION_TEXTURE2DMS : D3D11_SRV_DIMENSION_TEXTURE2D; - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); - const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create SRV for adopted texture failed: 0x%08X", hr); - return false; - } - } - - ComPtr rtv; - if (desc.BindFlags & D3D11_BIND_RENDER_TARGET) - { - const D3D11_RTV_DIMENSION rtv_dimension = - (desc.SampleDesc.Count > 1) ? D3D11_RTV_DIMENSION_TEXTURE2DMS : D3D11_RTV_DIMENSION_TEXTURE2D; - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); - const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create RTV for adopted texture failed: 0x%08X", hr); - return false; - } - } - - m_texture = std::move(texture); - m_srv = std::move(srv); - m_rtv = std::move(rtv); - m_width = static_cast(desc.Width); - m_height = static_cast(desc.Height); - m_layers = static_cast(desc.ArraySize); - m_levels = static_cast(desc.MipLevels); - m_samples = static_cast(desc.SampleDesc.Count); - m_dynamic = (desc.Usage == D3D11_USAGE_DYNAMIC); - return true; -} - -void D3D11::Texture::Destroy() -{ - m_rtv.Reset(); - m_srv.Reset(); - m_texture.Reset(); - m_dynamic = false; - ClearBaseProperties(); -} diff --git a/src/common/d3d11/texture.h b/src/common/d3d11/texture.h deleted file mode 100644 index 6060387af..000000000 --- a/src/common/d3d11/texture.h +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../gpu_texture.h" -#include "../windows_headers.h" -#include -#include - -namespace D3D11 { - -class Texture final : public GPUTexture -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - Texture(); - Texture(ComPtr texture, ComPtr srv, ComPtr rtv); - ~Texture(); - - static DXGI_FORMAT GetDXGIFormat(Format format); - static Format LookupBaseFormat(DXGI_FORMAT dformat); - - ALWAYS_INLINE ID3D11Texture2D* GetD3DTexture() const { return m_texture.Get(); } - ALWAYS_INLINE ID3D11ShaderResourceView* GetD3DSRV() const { return m_srv.Get(); } - ALWAYS_INLINE ID3D11RenderTargetView* GetD3DRTV() const { return m_rtv.Get(); } - ALWAYS_INLINE ID3D11ShaderResourceView* const* GetD3DSRVArray() const { return m_srv.GetAddressOf(); } - ALWAYS_INLINE ID3D11RenderTargetView* const* GetD3DRTVArray() const { return m_rtv.GetAddressOf(); } - ALWAYS_INLINE DXGI_FORMAT GetDXGIFormat() const { return GetDXGIFormat(m_format); } - ALWAYS_INLINE bool IsDynamic() const { return m_dynamic; } - - ALWAYS_INLINE operator ID3D11Texture2D*() const { return m_texture.Get(); } - ALWAYS_INLINE operator ID3D11ShaderResourceView*() const { return m_srv.Get(); } - ALWAYS_INLINE operator ID3D11RenderTargetView*() const { return m_rtv.Get(); } - ALWAYS_INLINE operator bool() const { return static_cast(m_texture); } - - D3D11_TEXTURE2D_DESC GetDesc() const; - - bool IsValid() const override; - - bool Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Format format, - u32 bind_flags, const void* initial_data = nullptr, u32 initial_data_stride = 0, bool dynamic = false); - bool Adopt(ID3D11Device* device, ComPtr texture); - - void Destroy(); - -private: - ComPtr m_texture; - ComPtr m_srv; - ComPtr m_rtv; - bool m_dynamic = false; -}; - -} // namespace D3D11 \ No newline at end of file diff --git a/src/common/d3d12/context.cpp b/src/common/d3d12/context.cpp deleted file mode 100644 index 186b1b30b..000000000 --- a/src/common/d3d12/context.cpp +++ /dev/null @@ -1,556 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. - -#include "context.h" -#include "../assert.h" -#include "../log.h" -#include "../scoped_guard.h" -#include -#include -#include -#include -#include -Log_SetChannel(D3D12::Context); - -std::unique_ptr g_d3d12_context; - -namespace D3D12 { - -#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) - -// Private D3D12 state -static HMODULE s_d3d12_library; -static PFN_D3D12_CREATE_DEVICE s_d3d12_create_device; -static PFN_D3D12_GET_DEBUG_INTERFACE s_d3d12_get_debug_interface; -static PFN_D3D12_SERIALIZE_ROOT_SIGNATURE s_d3d12_serialize_root_signature; - -static bool LoadD3D12Library() -{ - if ((s_d3d12_library = LoadLibrary("d3d12.dll")) == nullptr || - (s_d3d12_create_device = - reinterpret_cast(GetProcAddress(s_d3d12_library, "D3D12CreateDevice"))) == nullptr || - (s_d3d12_get_debug_interface = reinterpret_cast( - GetProcAddress(s_d3d12_library, "D3D12GetDebugInterface"))) == nullptr || - (s_d3d12_serialize_root_signature = reinterpret_cast( - GetProcAddress(s_d3d12_library, "D3D12SerializeRootSignature"))) == nullptr) - { - Log_ErrorPrintf("d3d12.dll could not be loaded."); - s_d3d12_create_device = nullptr; - s_d3d12_get_debug_interface = nullptr; - s_d3d12_serialize_root_signature = nullptr; - if (s_d3d12_library) - FreeLibrary(s_d3d12_library); - s_d3d12_library = nullptr; - return false; - } - - return true; -} - -static void UnloadD3D12Library() -{ - s_d3d12_serialize_root_signature = nullptr; - s_d3d12_get_debug_interface = nullptr; - s_d3d12_create_device = nullptr; - if (s_d3d12_library) - { - FreeLibrary(s_d3d12_library); - s_d3d12_library = nullptr; - } -} - -#else - -static const PFN_D3D12_CREATE_DEVICE s_d3d12_create_device = D3D12CreateDevice; -static const PFN_D3D12_GET_DEBUG_INTERFACE s_d3d12_get_debug_interface = D3D12GetDebugInterface; -static const PFN_D3D12_SERIALIZE_ROOT_SIGNATURE s_d3d12_serialize_root_signature = D3D12SerializeRootSignature; - -static bool LoadD3D12Library() -{ - return true; -} - -static void UnloadD3D12Library() {} - -#endif - -Context::Context() = default; - -Context::~Context() -{ - DestroyResources(); -} - -Context::ComPtr Context::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) -{ - ComPtr blob; - ComPtr error_blob; - const HRESULT hr = s_d3d12_serialize_root_signature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), - error_blob.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("D3D12SerializeRootSignature() failed: %08X", hr); - if (error_blob) - Log_ErrorPrintf("%s", error_blob->GetBufferPointer()); - - return {}; - } - - return blob; -} - -D3D12::Context::ComPtr Context::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) -{ - ComPtr blob = SerializeRootSignature(desc); - if (!blob) - return {}; - - ComPtr rs; - const HRESULT hr = - m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateRootSignature() failed: %08X", hr); - return {}; - } - - return rs; -} - -bool Context::SupportsTextureFormat(DXGI_FORMAT format) -{ - constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; - - D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {format}; - return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) && - (support.Support1 & required) == required; -} - -bool Context::Create(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer) -{ - Assert(!g_d3d12_context); - - if (!LoadD3D12Library()) - return false; - - g_d3d12_context.reset(new Context()); - if (!g_d3d12_context->CreateDevice(dxgi_factory, adapter_index, enable_debug_layer) || - !g_d3d12_context->CreateCommandQueue() || !g_d3d12_context->CreateFence() || - !g_d3d12_context->CreateDescriptorHeaps() || !g_d3d12_context->CreateCommandLists() || - !g_d3d12_context->CreateTimestampQuery() || !g_d3d12_context->CreateTextureStreamBuffer()) - { - Destroy(); - return false; - } - - return true; -} - -void Context::Destroy() -{ - if (g_d3d12_context) - g_d3d12_context.reset(); - - UnloadD3D12Library(); -} - -bool Context::CreateDevice(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer) -{ - ComPtr adapter; - HRESULT hr = dxgi_factory->EnumAdapters(adapter_index, &adapter); - if (FAILED(hr)) - { - Log_ErrorPrintf("Adapter %u not found, using default", adapter_index); - adapter = nullptr; - } - else - { - DXGI_ADAPTER_DESC adapter_desc; - if (SUCCEEDED(adapter->GetDesc(&adapter_desc))) - { - char adapter_name_buffer[128]; - const int name_length = WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, - static_cast(std::wcslen(adapter_desc.Description)), - adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); - if (name_length >= 0) - { - adapter_name_buffer[name_length] = 0; - Log_InfoPrintf("D3D Adapter: %s", adapter_name_buffer); - } - } - } - - // Enabling the debug layer will fail if the Graphics Tools feature is not installed. - if (enable_debug_layer) - { - hr = s_d3d12_get_debug_interface(IID_PPV_ARGS(&m_debug_interface)); - if (SUCCEEDED(hr)) - { - m_debug_interface->EnableDebugLayer(); - } - else - { - Log_ErrorPrintf("Debug layer requested but not available."); - enable_debug_layer = false; - } - } - - // Create the actual device. - hr = s_d3d12_create_device(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); - AssertMsg(SUCCEEDED(hr), "Create D3D12 device"); - if (FAILED(hr)) - return false; - - if (enable_debug_layer) - { - ComPtr info_queue; - if (SUCCEEDED(m_device.As(&info_queue))) - { - info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); - info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); - - D3D12_INFO_QUEUE_FILTER filter = {}; - std::array id_list{ - D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, - D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, - D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, - D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, - D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, - }; - filter.DenyList.NumIDs = static_cast(id_list.size()); - filter.DenyList.pIDList = id_list.data(); - info_queue->PushStorageFilter(&filter); - } - } - - return true; -} - -bool Context::CreateCommandQueue() -{ - const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, - D3D12_COMMAND_QUEUE_FLAG_NONE}; - HRESULT hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); - AssertMsg(SUCCEEDED(hr), "Create command queue"); - return SUCCEEDED(hr); -} - -bool Context::CreateFence() -{ - HRESULT hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); - AssertMsg(SUCCEEDED(hr), "Create fence"); - if (FAILED(hr)) - return false; - - m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); - AssertMsg(m_fence_event != NULL, "Create fence event"); - if (!m_fence_event) - return false; - - return true; -} - -bool Context::CreateDescriptorHeaps() -{ - static constexpr size_t MAX_SRVS = 16384; - static constexpr size_t MAX_RTVS = 8192; - static constexpr size_t MAX_DSVS = 128; - static constexpr size_t MAX_SAMPLERS = 128; - - if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, MAX_SRVS, true) || - !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_RTVS, false) || - !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_DSVS, false) || - !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_SAMPLERS, true)) - { - return false; - } - - m_gpu_descriptor_heaps[0] = m_descriptor_heap_manager.GetDescriptorHeap(); - m_gpu_descriptor_heaps[1] = m_sampler_heap_manager.GetDescriptorHeap(); - - // Allocate null SRV descriptor for unbound textures. - constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, - D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; - - if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) - { - Panic("Failed to allocate null descriptor"); - return false; - } - - m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); - return true; -} - -bool Context::CreateCommandLists() -{ - for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) - { - CommandListResources& res = m_command_lists[i]; - HRESULT hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(res.command_allocator.GetAddressOf())); - AssertMsg(SUCCEEDED(hr), "Create command allocator"); - if (FAILED(hr)) - return false; - - hr = m_device->CreateCommandList(1, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocator.Get(), nullptr, - IID_PPV_ARGS(res.command_list.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create command list: %08X", hr); - return false; - } - - // Close the command list, since the first thing we do is reset them. - hr = res.command_list->Close(); - AssertMsg(SUCCEEDED(hr), "Closing new command list failed"); - if (FAILED(hr)) - return false; - } - - MoveToNextCommandList(); - return true; -} - -bool Context::CreateTextureStreamBuffer() -{ - return m_texture_stream_buffer.Create(TEXTURE_UPLOAD_BUFFER_SIZE); -} - -void Context::MoveToNextCommandList() -{ - m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; - m_current_fence_value++; - - // We may have to wait if this command list hasn't finished on the GPU. - CommandListResources& res = m_command_lists[m_current_command_list]; - WaitForFence(res.ready_fence_value); - res.ready_fence_value = m_current_fence_value; - - // Begin command list. - res.command_allocator->Reset(); - res.command_list->Reset(res.command_allocator.Get(), nullptr); - - if (res.has_timestamp_query) - { - // readback timestamp from the last time this cmdlist was used. - // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist. - const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); - const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)}; - void* map; - HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map); - if (SUCCEEDED(hr)) - { - u64 timestamps[2]; - std::memcpy(timestamps, static_cast(map) + offset, sizeof(timestamps)); - m_accumulated_gpu_time += - static_cast(static_cast(timestamps[1] - timestamps[0]) / m_timestamp_frequency); - - const D3D12_RANGE write_range = {}; - m_timestamp_query_buffer->Unmap(0, &write_range); - } - else - { - Log_WarningPrintf("Map() for timestamp query failed: %08X", hr); - } - } - - res.has_timestamp_query = m_gpu_timing_enabled; - if (m_gpu_timing_enabled) - { - res.command_list->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, - m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); - } - - res.command_list->SetDescriptorHeaps(static_cast(m_gpu_descriptor_heaps.size()), m_gpu_descriptor_heaps.data()); -} - -void Context::ExecuteCommandList(bool wait_for_completion) -{ - CommandListResources& res = m_command_lists[m_current_command_list]; - HRESULT hr; - - if (res.has_timestamp_query) - { - // write the timestamp back at the end of the cmdlist - res.command_list->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, - (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); - res.command_list->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, - m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, - NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(), - m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); - } - - // Close and queue command list. - hr = res.command_list->Close(); - AssertMsg(SUCCEEDED(hr), "Close command list"); - const std::array execute_lists{res.command_list.Get()}; - m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); - - // Update fence when GPU has completed. - hr = m_command_queue->Signal(m_fence.Get(), m_current_fence_value); - AssertMsg(SUCCEEDED(hr), "Signal fence"); - - MoveToNextCommandList(); - if (wait_for_completion) - WaitForFence(res.ready_fence_value); -} - -void Context::DeferResourceDestruction(ID3D12Resource* resource) -{ - if (!resource) - return; - - resource->AddRef(); - m_command_lists[m_current_command_list].pending_resources.push_back(resource); -} - -void Context::DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index) -{ - m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, index); -} - -void Context::DeferDescriptorDestruction(DescriptorHeapManager& manager, DescriptorHandle* handle) -{ - if (handle->index == DescriptorHandle::INVALID_INDEX) - return; - - m_command_lists[m_current_command_list].pending_descriptors.emplace_back(manager, handle->index); - handle->Clear(); -} - -void Context::DestroyPendingResources(CommandListResources& cmdlist) -{ - for (const auto& dd : cmdlist.pending_descriptors) - dd.first.Free(dd.second); - cmdlist.pending_descriptors.clear(); - - for (ID3D12Resource* res : cmdlist.pending_resources) - res->Release(); - cmdlist.pending_resources.clear(); -} - -void Context::DestroyResources() -{ - ExecuteCommandList(true); - - m_timestamp_query_buffer.Reset(); - m_timestamp_query_heap.Reset(); - m_texture_stream_buffer.Destroy(false); - m_descriptor_heap_manager.Free(&m_null_srv_descriptor); - m_sampler_heap_manager.Destroy(); - m_dsv_heap_manager.Destroy(); - m_rtv_heap_manager.Destroy(); - m_descriptor_heap_manager.Destroy(); - m_command_lists = {}; - m_current_command_list = 0; - m_completed_fence_value = 0; - m_current_fence_value = 0; - if (m_fence_event) - { - CloseHandle(m_fence_event); - m_fence_event = {}; - } - - m_command_queue.Reset(); - m_debug_interface.Reset(); - m_device.Reset(); -} - -void Context::WaitForFence(u64 fence) -{ - if (m_completed_fence_value >= fence) - return; - - // Try non-blocking check. - m_completed_fence_value = m_fence->GetCompletedValue(); - if (m_completed_fence_value < fence) - { - // Fall back to event. - HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); - AssertMsg(SUCCEEDED(hr), "Set fence event on completion"); - WaitForSingleObject(m_fence_event, INFINITE); - m_completed_fence_value = m_fence->GetCompletedValue(); - } - - // Release resources for as many command lists which have completed. - u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; - for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) - { - CommandListResources& res = m_command_lists[index]; - if (m_completed_fence_value < res.ready_fence_value) - break; - - DestroyPendingResources(res); - index = (index + 1) % NUM_COMMAND_LISTS; - } -} - -void Context::WaitForGPUIdle() -{ - u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; - for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) - { - WaitForFence(m_command_lists[index].ready_fence_value); - index = (index + 1) % NUM_COMMAND_LISTS; - } -} - -bool Context::CreateTimestampQuery() -{ - constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS; - constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT; - - const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT}; - HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.ReleaseAndGetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateQueryHeap() for timestamp failed with %08X", hr); - return false; - } - - const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_READBACK}; - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, - 0, - BUFFER_SIZE, - 1, - 1, - 1, - DXGI_FORMAT_UNKNOWN, - {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_NONE}; - hr = m_device->CreateCommittedResource(&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, - D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(m_timestamp_query_buffer.ReleaseAndGetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateResource() for timestamp failed with %08X", hr); - return false; - } - - u64 frequency; - hr = m_command_queue->GetTimestampFrequency(&frequency); - if (FAILED(hr)) - { - Log_ErrorPrintf("GetTimestampFrequency() failed: %08X", hr); - return false; - } - - m_timestamp_frequency = static_cast(frequency) / 1000.0; - return true; -} - -float Context::GetAndResetAccumulatedGPUTime() -{ - const float time = m_accumulated_gpu_time; - m_accumulated_gpu_time = 0.0f; - return time; -} - -void Context::SetEnableGPUTiming(bool enabled) -{ - m_gpu_timing_enabled = enabled; -} -} // namespace D3D12 diff --git a/src/common/d3d12/context.h b/src/common/d3d12/context.h deleted file mode 100644 index 8bdc8e99b..000000000 --- a/src/common/d3d12/context.h +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. - -#pragma once - -#include "../types.h" -#include "../windows_headers.h" -#include "descriptor_heap_manager.h" -#include "stream_buffer.h" -#include -#include -#include -#include -#include - -struct IDXGIFactory; - -namespace D3D12 { - -class Context -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - enum : u32 - { - // Number of command lists. One is being built while the other(s) are executed. - NUM_COMMAND_LISTS = 3, - - // Textures that don't fit into this buffer will be uploaded with a staging buffer. - TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024, - - /// Start/End timestamp queries. - NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2, - }; - - ~Context(); - - // Creates new device and context. - static bool Create(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer); - - // Destroys active context. - static void Destroy(); - - ID3D12Device* GetDevice() const { return m_device.Get(); } - ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); } - - // Returns the current command list, commands can be recorded directly. - ID3D12GraphicsCommandList* GetCommandList() const - { - return m_command_lists[m_current_command_list].command_list.Get(); - } - - // Descriptor manager access. - DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; } - DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; } - DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; } - DescriptorHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; } - ID3D12DescriptorHeap* const* GetGPUDescriptorHeaps() const { return m_gpu_descriptor_heaps.data(); } - u32 GetGPUDescriptorHeapCount() const { return static_cast(m_gpu_descriptor_heaps.size()); } - const DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; } - StreamBuffer& GetTextureStreamBuffer() { return m_texture_stream_buffer; } - - // Root signature access. - ComPtr SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); - ComPtr CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); - - // Fence value for current command list. - u64 GetCurrentFenceValue() const { return m_current_fence_value; } - - // Last "completed" fence. - u64 GetCompletedFenceValue() const { return m_completed_fence_value; } - - // Feature level to use when compiling shaders. - D3D_FEATURE_LEVEL GetFeatureLevel() const { return m_feature_level; } - - // Test for support for the specified texture format. - bool SupportsTextureFormat(DXGI_FORMAT format); - - // Executes the current command list. - void ExecuteCommandList(bool wait_for_completion); - - // Waits for a specific fence. - void WaitForFence(u64 fence); - - // Waits for any in-flight command buffers to complete. - void WaitForGPUIdle(); - - // Defers destruction of a D3D resource (associates it with the current list). - void DeferResourceDestruction(ID3D12Resource* resource); - - // Defers destruction of a descriptor handle (associates it with the current list). - void DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index); - void DeferDescriptorDestruction(DescriptorHeapManager& manager, DescriptorHandle* handle); - - float GetAndResetAccumulatedGPUTime(); - void SetEnableGPUTiming(bool enabled); - -private: - struct CommandListResources - { - ComPtr command_allocator; - ComPtr command_list; - std::vector pending_resources; - std::vector> pending_descriptors; - u64 ready_fence_value = 0; - bool has_timestamp_query = false; - }; - - Context(); - - bool CreateDevice(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_debug_layer); - bool CreateCommandQueue(); - bool CreateFence(); - bool CreateDescriptorHeaps(); - bool CreateCommandLists(); - bool CreateTextureStreamBuffer(); - bool CreateTimestampQuery(); - void MoveToNextCommandList(); - void DestroyPendingResources(CommandListResources& cmdlist); - void DestroyResources(); - - ComPtr m_debug_interface; - ComPtr m_device; - ComPtr m_command_queue; - - ComPtr m_fence = nullptr; - HANDLE m_fence_event = {}; - u32 m_current_fence_value = 0; - u64 m_completed_fence_value = 0; - - std::array m_command_lists; - u32 m_current_command_list = NUM_COMMAND_LISTS - 1; - - ComPtr m_timestamp_query_heap; - ComPtr m_timestamp_query_buffer; - double m_timestamp_frequency = 0.0; - float m_accumulated_gpu_time = 0.0f; - bool m_gpu_timing_enabled = false; - - DescriptorHeapManager m_descriptor_heap_manager; - DescriptorHeapManager m_rtv_heap_manager; - DescriptorHeapManager m_dsv_heap_manager; - DescriptorHeapManager m_sampler_heap_manager; - std::array m_gpu_descriptor_heaps = {}; - DescriptorHandle m_null_srv_descriptor; - StreamBuffer m_texture_stream_buffer; - - D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; -}; - -} // namespace D3D12 - -extern std::unique_ptr g_d3d12_context; diff --git a/src/common/d3d12/descriptor_heap_manager.cpp b/src/common/d3d12/descriptor_heap_manager.cpp deleted file mode 100644 index ca215fc62..000000000 --- a/src/common/d3d12/descriptor_heap_manager.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. - -#include "descriptor_heap_manager.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" -Log_SetChannel(DescriptorHeapManager); - -namespace D3D12 { -DescriptorHeapManager::DescriptorHeapManager() = default; -DescriptorHeapManager::~DescriptorHeapManager() = default; - -bool DescriptorHeapManager::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, - bool shader_visible) -{ - D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), - shader_visible ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : - D3D12_DESCRIPTOR_HEAP_FLAG_NONE}; - - HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap)); - AssertMsg(SUCCEEDED(hr), "Create descriptor heap"); - if (FAILED(hr)) - return false; - - m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); - if (shader_visible) - m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); - m_num_descriptors = num_descriptors; - m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); - - // Set all slots to unallocated (1) - const u32 bitset_count = num_descriptors / BITSET_SIZE + (((num_descriptors % BITSET_SIZE) != 0) ? 1 : 0); - m_free_slots.resize(bitset_count); - for (BitSetType& bs : m_free_slots) - bs.flip(); - - return true; -} - -void DescriptorHeapManager::Destroy() -{ - for (BitSetType& bs : m_free_slots) - Assert(bs.all()); - - m_num_descriptors = 0; - m_descriptor_increment_size = 0; - m_heap_base_cpu = {}; - m_heap_base_gpu = {}; - m_descriptor_heap.Reset(); - m_free_slots.clear(); -} - -bool DescriptorHeapManager::Allocate(DescriptorHandle* handle, u32 count /* = 1 */) -{ - // Start past the temporary slots, no point in searching those. - for (u32 group = 0; group < m_free_slots.size(); group++) - { - BitSetType& bs = m_free_slots[group]; - if (bs.none()) - continue; - - u32 bit = 0; - for (; bit < BITSET_SIZE; bit++) - { - if (bs[bit]) - { - u32 offset; - for (offset = 0; offset < count; offset++) - { - if (!bs[bit + offset]) - break; - } - - if (offset == count) - break; - } - } - - u32 index = group * BITSET_SIZE + bit; - for (u32 offset = 0; offset < count; offset++) - bs[bit + offset] = false; - - handle->index = index; - handle->cpu_handle.ptr = m_heap_base_cpu.ptr + index * m_descriptor_increment_size; - handle->gpu_handle.ptr = m_heap_base_gpu.ptr + index * m_descriptor_increment_size; - return true; - } - - Panic("Out of fixed descriptors"); - return false; -} - -void DescriptorHeapManager::Free(u32 index, u32 count /* = 1 */) -{ - Assert(index < m_num_descriptors); - - for (u32 i = 0; i < count; i++, index++) - { - u32 group = index / BITSET_SIZE; - u32 bit = index % BITSET_SIZE; - m_free_slots[group][bit] = true; - } -} - -void DescriptorHeapManager::Free(DescriptorHandle* handle, u32 count /* = 1 */) -{ - if (handle->index == DescriptorHandle::INVALID_INDEX) - return; - - Free(handle->index, count); - handle->Clear(); -} - -} // namespace D3D12 diff --git a/src/common/d3d12/descriptor_heap_manager.h b/src/common/d3d12/descriptor_heap_manager.h deleted file mode 100644 index c5ad8267b..000000000 --- a/src/common/d3d12/descriptor_heap_manager.h +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. - -#pragma once - -#include "../types.h" -#include "../windows_headers.h" -#include -#include -#include -#include -#include - -namespace D3D12 { -// This class provides an abstraction for D3D12 descriptor heaps. -struct DescriptorHandle final -{ - enum : u32 - { - INVALID_INDEX = 0xFFFFFFFF - }; - - D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle{}; - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle{}; - u32 index = INVALID_INDEX; - - ALWAYS_INLINE operator bool() const { return index != INVALID_INDEX; } - - ALWAYS_INLINE operator D3D12_CPU_DESCRIPTOR_HANDLE() const { return cpu_handle; } - ALWAYS_INLINE operator D3D12_GPU_DESCRIPTOR_HANDLE() const { return gpu_handle; } - - ALWAYS_INLINE void Clear() - { - cpu_handle = {}; - gpu_handle = {}; - index = INVALID_INDEX; - } -}; - -class DescriptorHeapManager final -{ -public: - DescriptorHeapManager(); - ~DescriptorHeapManager(); - - ALWAYS_INLINE ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } - ALWAYS_INLINE u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } - - ALWAYS_INLINE D3D12_CPU_DESCRIPTOR_HANDLE OffsetCPUHandle(D3D12_CPU_DESCRIPTOR_HANDLE handle, u32 count) const - { - D3D12_CPU_DESCRIPTOR_HANDLE ret; - ret.ptr = handle.ptr + m_descriptor_increment_size * count; - return ret; - } - - ALWAYS_INLINE D3D12_GPU_DESCRIPTOR_HANDLE OffsetGPUHandle(D3D12_GPU_DESCRIPTOR_HANDLE handle, u32 count) const - { - D3D12_GPU_DESCRIPTOR_HANDLE ret; - ret.ptr = handle.ptr + m_descriptor_increment_size * count; - return ret; - } - - bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, bool shader_visible); - void Destroy(); - - bool Allocate(DescriptorHandle* handle, u32 count = 1); - void Free(DescriptorHandle* handle, u32 count = 1); - void Free(u32 index, u32 count = 1); - -private: - Microsoft::WRL::ComPtr m_descriptor_heap; - u32 m_num_descriptors = 0; - u32 m_descriptor_increment_size = 0; - - D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; - D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; - - static constexpr u32 BITSET_SIZE = 1024; - using BitSetType = std::bitset; - std::vector m_free_slots = {}; -}; - -} // namespace D3D12 diff --git a/src/common/d3d12/shader_cache.cpp b/src/common/d3d12/shader_cache.cpp deleted file mode 100644 index ce3f878e7..000000000 --- a/src/common/d3d12/shader_cache.cpp +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_cache.h" -#include "../d3d11/shader_compiler.h" -#include "../file_system.h" -#include "../log.h" -#include "../md5_digest.h" -#include -Log_SetChannel(D3D12::ShaderCache); - -namespace D3D12 { - -#pragma pack(push, 1) -struct CacheIndexEntry -{ - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - u32 shader_type; - u32 file_offset; - u32 blob_size; -}; -#pragma pack(pop) - -ShaderCache::ShaderCache() = default; - -ShaderCache::~ShaderCache() -{ - if (m_pipeline_index_file) - std::fclose(m_pipeline_index_file); - if (m_pipeline_blob_file) - std::fclose(m_pipeline_blob_file); - if (m_shader_index_file) - std::fclose(m_shader_index_file); - if (m_shader_blob_file) - std::fclose(m_shader_blob_file); -} - -bool ShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const -{ - return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high && - source_length == key.source_length && type == key.type); -} - -bool ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const -{ - return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high || - source_length != key.source_length || type != key.type); -} - -void ShaderCache::Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, bool debug) -{ - m_base_path = base_path; - m_feature_level = feature_level; - m_debug = debug; - - if (!base_path.empty()) - { - const std::string base_shader_filename = GetCacheBaseFileName(base_path, "shaders", feature_level, debug); - const std::string shader_index_filename = base_shader_filename + ".idx"; - const std::string shader_blob_filename = base_shader_filename + ".bin"; - - if (!ReadExisting(shader_index_filename, shader_blob_filename, m_shader_index_file, m_shader_blob_file, - m_shader_index)) - { - CreateNew(shader_index_filename, shader_blob_filename, m_shader_index_file, m_shader_blob_file); - } - - const std::string base_pipelines_filename = GetCacheBaseFileName(base_path, "pipelines", feature_level, debug); - const std::string pipelines_index_filename = base_pipelines_filename + ".idx"; - const std::string pipelines_blob_filename = base_pipelines_filename + ".bin"; - - if (!ReadExisting(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file, - m_pipeline_index)) - { - CreateNew(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file); - } - } -} - -void ShaderCache::InvalidatePipelineCache() -{ - m_pipeline_index.clear(); - if (m_pipeline_blob_file) - { - std::fclose(m_pipeline_blob_file); - m_pipeline_blob_file = nullptr; - } - - if (m_pipeline_index_file) - { - std::fclose(m_pipeline_index_file); - m_pipeline_index_file = nullptr; - } - - const std::string base_pipelines_filename = GetCacheBaseFileName(m_base_path, "pipelines", m_feature_level, m_debug); - const std::string pipelines_index_filename = base_pipelines_filename + ".idx"; - const std::string pipelines_blob_filename = base_pipelines_filename + ".bin"; - CreateNew(pipelines_index_filename, pipelines_blob_filename, m_pipeline_index_file, m_pipeline_blob_file); -} - -bool ShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, - std::FILE*& blob_file) -{ - if (FileSystem::FileExists(index_filename.c_str())) - { - Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); - FileSystem::DeleteFile(index_filename.c_str()); - } - if (FileSystem::FileExists(blob_filename.c_str())) - { - Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); - FileSystem::DeleteFile(blob_filename.c_str()); - } - - index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); - if (!index_file) - { - Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); - return false; - } - - const u32 index_version = FILE_VERSION; - if (std::fwrite(&index_version, sizeof(index_version), 1, index_file) != 1) - { - Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); - std::fclose(index_file); - index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); - if (!blob_file) - { - Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); - std::fclose(blob_file); - blob_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - return true; -} - -bool ShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename, - std::FILE*& index_file, std::FILE*& blob_file, CacheIndex& index) -{ - index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); - if (!index_file) - return false; - - u32 file_version; - if (std::fread(&file_version, sizeof(file_version), 1, index_file) != 1 || file_version != FILE_VERSION) - { - Log_ErrorPrintf("Bad file version in '%s'", index_filename.c_str()); - std::fclose(index_file); - index_file = nullptr; - return false; - } - - blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); - if (!blob_file) - { - Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); - std::fclose(index_file); - index_file = nullptr; - return false; - } - - std::fseek(blob_file, 0, SEEK_END); - const u32 blob_file_size = static_cast(std::ftell(blob_file)); - - for (;;) - { - CacheIndexEntry entry; - if (std::fread(&entry, sizeof(entry), 1, index_file) != 1 || (entry.file_offset + entry.blob_size) > blob_file_size) - { - if (std::feof(index_file)) - break; - - Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); - index.clear(); - std::fclose(blob_file); - blob_file = nullptr; - std::fclose(index_file); - index_file = nullptr; - return false; - } - - const CacheIndexKey key{entry.source_hash_low, entry.source_hash_high, entry.source_length, - static_cast(entry.shader_type)}; - const CacheIndexData data{entry.file_offset, entry.blob_size}; - index.emplace(key, data); - } - - // ensure we don't write before seeking - std::fseek(index_file, 0, SEEK_END); - - Log_InfoPrintf("Read %zu entries from '%s'", index.size(), index_filename.c_str()); - return true; -} - -std::string ShaderCache::GetCacheBaseFileName(const std::string_view& base_path, const std::string_view& type, - D3D_FEATURE_LEVEL feature_level, bool debug) -{ - std::string base_filename(base_path); - base_filename += FS_OSPATH_SEPARATOR_STR "d3d12_"; - base_filename += type; - base_filename += "_"; - - switch (feature_level) - { - case D3D_FEATURE_LEVEL_10_0: - base_filename += "sm40"; - break; - case D3D_FEATURE_LEVEL_10_1: - base_filename += "sm41"; - break; - case D3D_FEATURE_LEVEL_11_0: - base_filename += "sm50"; - break; - default: - base_filename += "unk"; - break; - } - - if (debug) - base_filename += "_debug"; - - return base_filename; -} - -union MD5Hash -{ - struct - { - u64 low; - u64 high; - }; - u8 hash[16]; -}; - -ShaderCache::CacheIndexKey ShaderCache::GetShaderCacheKey(EntryType type, const std::string_view& shader_code) -{ - MD5Hash h; - MD5Digest digest; - digest.Update(shader_code.data(), static_cast(shader_code.length())); - digest.Final(h.hash); - - return CacheIndexKey{h.low, h.high, static_cast(shader_code.length()), type}; -} - -ShaderCache::CacheIndexKey ShaderCache::GetPipelineCacheKey(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc) -{ - MD5Digest digest; - u32 length = sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC); - - if (gpdesc.VS.BytecodeLength > 0) - { - digest.Update(gpdesc.VS.pShaderBytecode, static_cast(gpdesc.VS.BytecodeLength)); - length += static_cast(gpdesc.VS.BytecodeLength); - } - if (gpdesc.GS.BytecodeLength > 0) - { - digest.Update(gpdesc.GS.pShaderBytecode, static_cast(gpdesc.GS.BytecodeLength)); - length += static_cast(gpdesc.GS.BytecodeLength); - } - if (gpdesc.PS.BytecodeLength > 0) - { - digest.Update(gpdesc.PS.pShaderBytecode, static_cast(gpdesc.PS.BytecodeLength)); - length += static_cast(gpdesc.PS.BytecodeLength); - } - - digest.Update(&gpdesc.BlendState, sizeof(gpdesc.BlendState)); - digest.Update(&gpdesc.SampleMask, sizeof(gpdesc.SampleMask)); - digest.Update(&gpdesc.RasterizerState, sizeof(gpdesc.RasterizerState)); - digest.Update(&gpdesc.DepthStencilState, sizeof(gpdesc.DepthStencilState)); - - for (u32 i = 0; i < gpdesc.InputLayout.NumElements; i++) - { - const D3D12_INPUT_ELEMENT_DESC& ie = gpdesc.InputLayout.pInputElementDescs[i]; - digest.Update(ie.SemanticName, static_cast(std::strlen(ie.SemanticName))); - digest.Update(&ie.SemanticIndex, sizeof(ie.SemanticIndex)); - digest.Update(&ie.Format, sizeof(ie.Format)); - digest.Update(&ie.InputSlot, sizeof(ie.InputSlot)); - digest.Update(&ie.AlignedByteOffset, sizeof(ie.AlignedByteOffset)); - digest.Update(&ie.InputSlotClass, sizeof(ie.InputSlotClass)); - digest.Update(&ie.InstanceDataStepRate, sizeof(ie.InstanceDataStepRate)); - length += sizeof(D3D12_INPUT_ELEMENT_DESC); - } - - digest.Update(&gpdesc.IBStripCutValue, sizeof(gpdesc.IBStripCutValue)); - digest.Update(&gpdesc.PrimitiveTopologyType, sizeof(gpdesc.PrimitiveTopologyType)); - digest.Update(&gpdesc.NumRenderTargets, sizeof(gpdesc.NumRenderTargets)); - digest.Update(gpdesc.RTVFormats, sizeof(gpdesc.RTVFormats)); - digest.Update(&gpdesc.DSVFormat, sizeof(gpdesc.DSVFormat)); - digest.Update(&gpdesc.SampleDesc, sizeof(gpdesc.SampleDesc)); - digest.Update(&gpdesc.Flags, sizeof(gpdesc.Flags)); - - MD5Hash h; - digest.Final(h.hash); - - return CacheIndexKey{h.low, h.high, length, EntryType::GraphicsPipeline}; -} - -ShaderCache::ComPtr ShaderCache::GetShaderBlob(EntryType type, std::string_view shader_code) -{ - const auto key = GetShaderCacheKey(type, shader_code); - auto iter = m_shader_index.find(key); - if (iter == m_shader_index.end()) - return CompileAndAddShaderBlob(key, shader_code); - - ComPtr blob; - HRESULT hr = D3DCreateBlob(iter->second.blob_size, blob.GetAddressOf()); - if (FAILED(hr) || std::fseek(m_shader_blob_file, iter->second.file_offset, SEEK_SET) != 0 || - std::fread(blob->GetBufferPointer(), 1, iter->second.blob_size, m_shader_blob_file) != iter->second.blob_size) - { - Log_ErrorPrintf("Read blob from file failed"); - return {}; - } - - return blob; -} - -ShaderCache::ComPtr ShaderCache::GetPipelineState(ID3D12Device* device, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC& desc) -{ - const auto key = GetPipelineCacheKey(desc); - - auto iter = m_pipeline_index.find(key); - if (iter == m_pipeline_index.end()) - return CompileAndAddPipeline(device, key, desc); - - ComPtr blob; - HRESULT hr = D3DCreateBlob(iter->second.blob_size, blob.GetAddressOf()); - if (FAILED(hr) || std::fseek(m_pipeline_blob_file, iter->second.file_offset, SEEK_SET) != 0 || - std::fread(blob->GetBufferPointer(), 1, iter->second.blob_size, m_pipeline_blob_file) != iter->second.blob_size) - { - Log_ErrorPrintf("Read blob from file failed"); - return {}; - } - - D3D12_GRAPHICS_PIPELINE_STATE_DESC desc_with_blob(desc); - desc_with_blob.CachedPSO.pCachedBlob = blob->GetBufferPointer(); - desc_with_blob.CachedPSO.CachedBlobSizeInBytes = blob->GetBufferSize(); - - ComPtr pso; - hr = device->CreateGraphicsPipelineState(&desc_with_blob, IID_PPV_ARGS(pso.GetAddressOf())); - if (FAILED(hr)) - { - Log_WarningPrintf("Creating cached PSO failed: %08X. Invalidating cache.", hr); - InvalidatePipelineCache(); - pso = CompileAndAddPipeline(device, key, desc); - } - - return pso; -} - -ShaderCache::ComPtr ShaderCache::CompileAndAddShaderBlob(const CacheIndexKey& key, - std::string_view shader_code) -{ - ComPtr blob; - - switch (key.type) - { - case EntryType::VertexShader: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_feature_level, shader_code, - m_debug); - break; - case EntryType::GeometryShader: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Geometry, m_feature_level, shader_code, - m_debug); - break; - case EntryType::PixelShader: - blob = - D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_feature_level, shader_code, m_debug); - break; - default: - break; - } - - if (!blob) - return {}; - - if (!m_shader_blob_file || std::fseek(m_shader_blob_file, 0, SEEK_END) != 0) - return blob; - - CacheIndexData data; - data.file_offset = static_cast(std::ftell(m_shader_blob_file)); - data.blob_size = static_cast(blob->GetBufferSize()); - - CacheIndexEntry entry = {}; - entry.source_hash_low = key.source_hash_low; - entry.source_hash_high = key.source_hash_high; - entry.source_length = key.source_length; - entry.shader_type = static_cast(key.type); - entry.blob_size = data.blob_size; - entry.file_offset = data.file_offset; - - if (std::fwrite(blob->GetBufferPointer(), 1, entry.blob_size, m_shader_blob_file) != entry.blob_size || - std::fflush(m_shader_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_shader_index_file) != 1 || - std::fflush(m_shader_index_file) != 0) - { - Log_ErrorPrintf("Failed to write shader blob to file"); - return blob; - } - - m_shader_index.emplace(key, data); - return blob; -} - -ShaderCache::ComPtr -ShaderCache::CompileAndAddPipeline(ID3D12Device* device, const CacheIndexKey& key, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc) -{ - ComPtr pso; - HRESULT hr = device->CreateGraphicsPipelineState(&gpdesc, IID_PPV_ARGS(pso.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Creating cached PSO failed: %08X", hr); - return {}; - } - - if (!m_pipeline_blob_file || std::fseek(m_pipeline_blob_file, 0, SEEK_END) != 0) - return pso; - - ComPtr blob; - hr = pso->GetCachedBlob(blob.GetAddressOf()); - if (FAILED(hr)) - { - Log_WarningPrintf("Failed to get cached PSO data: %08X", hr); - return pso; - } - - CacheIndexData data; - data.file_offset = static_cast(std::ftell(m_pipeline_blob_file)); - data.blob_size = static_cast(blob->GetBufferSize()); - - CacheIndexEntry entry = {}; - entry.source_hash_low = key.source_hash_low; - entry.source_hash_high = key.source_hash_high; - entry.source_length = key.source_length; - entry.shader_type = static_cast(key.type); - entry.blob_size = data.blob_size; - entry.file_offset = data.file_offset; - - if (std::fwrite(blob->GetBufferPointer(), 1, entry.blob_size, m_pipeline_blob_file) != entry.blob_size || - std::fflush(m_pipeline_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_pipeline_index_file) != 1 || - std::fflush(m_pipeline_index_file) != 0) - { - Log_ErrorPrintf("Failed to write pipeline blob to file"); - return pso; - } - - m_shader_index.emplace(key, data); - return pso; -} - -} // namespace D3D12 diff --git a/src/common/d3d12/shader_cache.h b/src/common/d3d12/shader_cache.h deleted file mode 100644 index b5c2e6b1d..000000000 --- a/src/common/d3d12/shader_cache.h +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../hash_combine.h" -#include "../types.h" -#include "../windows_headers.h" -#include -#include -#include -#include -#include -#include - -namespace D3D12 { - -class ShaderCache -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - enum class EntryType - { - VertexShader, - GeometryShader, - PixelShader, - ComputeShader, - GraphicsPipeline, - }; - - ShaderCache(); - ~ShaderCache(); - - void Open(std::string_view base_path, D3D_FEATURE_LEVEL feature_level, bool debug); - - ALWAYS_INLINE ComPtr GetVertexShader(std::string_view shader_code) - { - return GetShaderBlob(EntryType::VertexShader, shader_code); - } - ALWAYS_INLINE ComPtr GetGeometryShader(std::string_view shader_code) - { - return GetShaderBlob(EntryType::GeometryShader, shader_code); - } - ALWAYS_INLINE ComPtr GetPixelShader(std::string_view shader_code) - { - return GetShaderBlob(EntryType::PixelShader, shader_code); - } - ALWAYS_INLINE ComPtr GetComputeShader(std::string_view shader_code) - { - return GetShaderBlob(EntryType::ComputeShader, shader_code); - } - - ComPtr GetShaderBlob(EntryType type, std::string_view shader_code); - - ComPtr GetPipelineState(ID3D12Device* device, const D3D12_GRAPHICS_PIPELINE_STATE_DESC& desc); - -private: - static constexpr u32 FILE_VERSION = 1; - - struct CacheIndexKey - { - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - EntryType type; - - bool operator==(const CacheIndexKey& key) const; - bool operator!=(const CacheIndexKey& key) const; - }; - - struct CacheIndexEntryHasher - { - std::size_t operator()(const CacheIndexKey& e) const noexcept - { - std::size_t h = 0; - hash_combine(h, e.source_hash_low, e.source_hash_high, e.source_length, e.type); - return h; - } - }; - - struct CacheIndexData - { - u32 file_offset; - u32 blob_size; - }; - - using CacheIndex = std::unordered_map; - - static std::string GetCacheBaseFileName(const std::string_view& base_path, const std::string_view& type, - D3D_FEATURE_LEVEL feature_level, bool debug); - static CacheIndexKey GetShaderCacheKey(EntryType type, const std::string_view& shader_code); - static CacheIndexKey GetPipelineCacheKey(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc); - - bool CreateNew(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, - std::FILE*& blob_file); - bool ReadExisting(const std::string& index_filename, const std::string& blob_filename, std::FILE*& index_file, - std::FILE*& blob_file, CacheIndex& index); - void InvalidatePipelineCache(); - void Close(); - - ComPtr CompileAndAddShaderBlob(const CacheIndexKey& key, std::string_view shader_code); - ComPtr CompileAndAddPipeline(ID3D12Device* device, const CacheIndexKey& key, - const D3D12_GRAPHICS_PIPELINE_STATE_DESC& gpdesc); - - std::string m_base_path; - - std::FILE* m_shader_index_file = nullptr; - std::FILE* m_shader_blob_file = nullptr; - CacheIndex m_shader_index; - - std::FILE* m_pipeline_index_file = nullptr; - std::FILE* m_pipeline_blob_file = nullptr; - CacheIndex m_pipeline_index; - - D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; - bool m_debug = false; -}; - -} // namespace D3D12 diff --git a/src/common/d3d12/staging_texture.cpp b/src/common/d3d12/staging_texture.cpp deleted file mode 100644 index 76e6ffeb3..000000000 --- a/src/common/d3d12/staging_texture.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "staging_texture.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" -#include "util.h" -Log_SetChannel(D3D12); - -namespace D3D12 { - -StagingTexture::StagingTexture() : m_width(0), m_height(0) {} - -StagingTexture::~StagingTexture() -{ - Destroy(); -} - -bool StagingTexture::Create(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading) -{ - const u32 texel_size = GetTexelSize(format); - const u32 row_pitch = Common::AlignUpPow2(width * texel_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 buffer_size = height * row_pitch; - - const D3D12_HEAP_PROPERTIES heap_properties = {for_uploading ? D3D12_HEAP_TYPE_UPLOAD : D3D12_HEAP_TYPE_READBACK}; - - D3D12_RESOURCE_DESC desc = {}; - desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - desc.Width = buffer_size; - desc.Height = 1; - desc.DepthOrArraySize = 1; - desc.MipLevels = 1; - desc.Format = DXGI_FORMAT_UNKNOWN; - desc.SampleDesc.Count = 1; - desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - desc.Flags = D3D12_RESOURCE_FLAG_NONE; - - D3D12_RESOURCE_STATES state = for_uploading ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; - - ComPtr resource; - HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource( - &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, state, nullptr, IID_PPV_ARGS(resource.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create buffer failed: 0x%08X", hr); - return false; - } - - Destroy(true); - - m_resource = std::move(resource); - m_width = width; - m_height = height; - m_format = format; - m_buffer_size = buffer_size; - m_row_pitch = row_pitch; - m_texel_size = texel_size; - return true; -} - -void StagingTexture::Destroy(bool defer) -{ - if (IsMapped()) - Unmap(); - - if (m_resource && defer) - g_d3d12_context->DeferResourceDestruction(m_resource.Get()); - m_resource.Reset(); - m_width = 0; - m_height = 0; - m_format = DXGI_FORMAT_UNKNOWN; - m_buffer_size = 0; - m_row_pitch = 0; - m_texel_size = 0; -} - -bool StagingTexture::Map(bool writing) -{ - D3D12_RANGE range{0u, m_buffer_size}; - - Assert(!IsMapped()); - const HRESULT hr = m_resource->Map(0, writing ? nullptr : &range, &m_mapped_pointer); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map staging buffer failed: 0x%08X", hr); - return false; - } - - m_mapped_for_write = writing; - return true; -} - -void StagingTexture::Unmap() -{ - Assert(IsMapped()); - - D3D12_RANGE range{0u, m_buffer_size}; - m_resource->Unmap(0, m_mapped_for_write ? &range : nullptr); - m_mapped_pointer = nullptr; - m_mapped_for_write = false; -} - -void StagingTexture::Flush() -{ - if (!m_needs_flush) - return; - - m_needs_flush = false; - - // If the completed fence is the same as the current command buffer fence, we need to execute - // the current list and wait for it to complete. This is the slowest path. Otherwise, if the - // command list with the copy has been submitted, we only need to wait for the fence. - if (m_completed_fence == g_d3d12_context->GetCurrentFenceValue()) - g_d3d12_context->ExecuteCommandList(true); - else - g_d3d12_context->WaitForFence(m_completed_fence); -} - -void StagingTexture::CopyToTexture(u32 src_x, u32 src_y, ID3D12Resource* dst_texture, u32 dst_subresource, u32 dst_x, - u32 dst_y, u32 width, u32 height) -{ - DebugAssert((src_x + width) <= m_width && (src_y + height) <= m_height); - - D3D12_TEXTURE_COPY_LOCATION dst; - dst.pResource = dst_texture; - dst.SubresourceIndex = 0; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - D3D12_TEXTURE_COPY_LOCATION src; - src.pResource = m_resource.Get(); - src.SubresourceIndex = 0; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Offset = 0; - src.PlacedFootprint.Footprint.Width = m_width; - src.PlacedFootprint.Footprint.Height = m_height; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.Format = m_format; - src.PlacedFootprint.Footprint.RowPitch = m_row_pitch; - - const D3D12_BOX src_box{src_x, src_y, 0u, src_x + width, src_y + height, 1u}; - g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); -} - -void StagingTexture::CopyFromTexture(ID3D12Resource* src_texture, u32 src_subresource, u32 src_x, u32 src_y, u32 dst_x, - u32 dst_y, u32 width, u32 height) -{ - DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); - - D3D12_TEXTURE_COPY_LOCATION src; - src.pResource = src_texture; - src.SubresourceIndex = 0; - src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - D3D12_TEXTURE_COPY_LOCATION dst; - dst.pResource = m_resource.Get(); - dst.SubresourceIndex = 0; - dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst.PlacedFootprint.Offset = 0; - dst.PlacedFootprint.Footprint.Width = m_width; - dst.PlacedFootprint.Footprint.Height = m_height; - dst.PlacedFootprint.Footprint.Depth = 1; - dst.PlacedFootprint.Footprint.Format = m_format; - dst.PlacedFootprint.Footprint.RowPitch = m_row_pitch; - - const D3D12_BOX src_box{src_x, src_y, 0u, src_x + width, src_y + height, 1u}; - g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); - m_completed_fence = g_d3d12_context->GetCurrentFenceValue(); - m_needs_flush = true; -} - -bool StagingTexture::ReadPixels(u32 x, u32 y, u32 width, u32 height, void* data, u32 row_pitch) -{ - if (m_needs_flush) - Flush(); - - const bool was_mapped = IsMapped(); - if (!was_mapped && !Map(false)) - return false; - - const u8* src_ptr = static_cast(m_mapped_pointer) + (y * m_row_pitch) + (x * m_texel_size); - u8* dst_ptr = reinterpret_cast(data); - if (m_row_pitch != row_pitch || width != m_width || x != 0) - { - const u32 copy_size = m_texel_size * width; - for (u32 row = 0; row < height; row++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += m_row_pitch; - dst_ptr += row_pitch; - } - } - else - { - std::memcpy(dst_ptr, src_ptr, row_pitch * height); - } - - return true; -} - -bool StagingTexture::WritePixels(u32 x, u32 y, u32 width, u32 height, const void* data, u32 row_pitch) -{ - const bool was_mapped = IsMapped(); - if (!was_mapped && !Map(true)) - return false; - - const u8* src_ptr = reinterpret_cast(data); - u8* dst_ptr = static_cast(m_mapped_pointer) + (y * m_row_pitch) + (x * m_texel_size); - if (m_row_pitch != row_pitch || width != m_width || x != 0) - { - const u32 copy_size = m_texel_size * width; - for (u32 row = 0; row < height; row++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += row_pitch; - dst_ptr += m_row_pitch; - } - } - else - { - std::memcpy(dst_ptr, src_ptr, m_row_pitch * height); - } - - if (!was_mapped) - Unmap(); - - return true; -} - -bool StagingTexture::EnsureSize(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading) -{ - if (m_resource && m_width >= width && m_height >= height && m_format == format) - return true; - - return Create(width, height, format, for_uploading); -} - -} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/staging_texture.h b/src/common/d3d12/staging_texture.h deleted file mode 100644 index 2d97954b4..000000000 --- a/src/common/d3d12/staging_texture.h +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../types.h" -#include "../windows_headers.h" -#include -#include -#include - -namespace D3D12 { -class StagingTexture -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - StagingTexture(); - ~StagingTexture(); - - ALWAYS_INLINE ID3D12Resource* GetD3DResource() const { return m_resource.Get(); } - - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - ALWAYS_INLINE DXGI_FORMAT GetFormat() const { return m_format; } - ALWAYS_INLINE bool IsMapped() const { return m_mapped_pointer != nullptr; } - ALWAYS_INLINE const void* GetMapPointer() const { return m_mapped_pointer; } - - ALWAYS_INLINE operator bool() const { return static_cast(m_resource); } - - bool Create(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading); - void Destroy(bool defer = true); - - bool Map(bool writing); - void Unmap(); - void Flush(); - - void CopyToTexture(u32 src_x, u32 src_y, ID3D12Resource* dst_texture, u32 dst_subresource, u32 dst_x, u32 dst_y, - u32 width, u32 height); - void CopyFromTexture(ID3D12Resource* src_texture, u32 src_subresource, u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, - u32 width, u32 height); - - - bool ReadPixels(u32 x, u32 y, u32 width, u32 height, void* data, u32 row_pitch); - - bool WritePixels(u32 x, u32 y, u32 width, u32 height, const void* data, u32 row_pitch); - - bool EnsureSize(u32 width, u32 height, DXGI_FORMAT format, bool for_uploading); - -protected: - ComPtr m_resource; - u32 m_width; - u32 m_height; - DXGI_FORMAT m_format; - u32 m_texel_size; - u32 m_row_pitch; - u32 m_buffer_size; - - void* m_mapped_pointer = nullptr; - u64 m_completed_fence = 0; - bool m_mapped_for_write = false; - bool m_needs_flush = false; -}; - -} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/texture.cpp b/src/common/d3d12/texture.cpp deleted file mode 100644 index cfa19cdd2..000000000 --- a/src/common/d3d12/texture.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "texture.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" -#include "staging_texture.h" -#include "stream_buffer.h" -#include "util.h" -Log_SetChannel(D3D12); - -static constexpr std::array(GPUTexture::Format::Count)> s_dxgi_mapping = { - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B5G6R5_UNORM, - DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_D16_UNORM}}; - -D3D12::Texture::Texture() = default; - -D3D12::Texture::Texture(ID3D12Resource* resource, D3D12_RESOURCE_STATES state) : m_resource(std::move(resource)) -{ - const D3D12_RESOURCE_DESC desc = GetDesc(); - m_width = static_cast(desc.Width); - m_height = static_cast(desc.Height); - m_layers = static_cast(desc.DepthOrArraySize); - m_levels = static_cast(desc.MipLevels); - m_samples = static_cast(desc.SampleDesc.Count); - m_format = LookupBaseFormat(desc.Format); -} - -D3D12::Texture::Texture(Texture&& texture) - : m_resource(std::move(texture.m_resource)), m_srv_descriptor(texture.m_srv_descriptor), - m_rtv_or_dsv_descriptor(texture.m_rtv_or_dsv_descriptor), m_is_depth_view(texture.m_is_depth_view) -{ - m_width = texture.m_width; - m_height = texture.m_height; - m_layers = texture.m_layers; - m_levels = texture.m_levels; - m_samples = texture.m_samples; - texture.m_srv_descriptor = {}; - texture.m_rtv_or_dsv_descriptor = {}; - texture.m_state = D3D12_RESOURCE_STATE_COMMON; - texture.m_is_depth_view = false; - texture.ClearBaseProperties(); -} - -DXGI_FORMAT D3D12::Texture::GetDXGIFormat(Format format) -{ - return s_dxgi_mapping[static_cast(format)]; -} - -GPUTexture::Format D3D12::Texture::LookupBaseFormat(DXGI_FORMAT dformat) -{ - for (u32 i = 0; i < static_cast(s_dxgi_mapping.size()); i++) - { - if (s_dxgi_mapping[i] == dformat) - return static_cast(i); - } - return GPUTexture::Format::Unknown; -} - -D3D12::Texture::~Texture() -{ - Destroy(); -} - -D3D12::Texture& D3D12::Texture::operator=(Texture&& texture) -{ - Destroy(); - - m_width = texture.m_width; - m_height = texture.m_height; - m_layers = texture.m_layers; - m_levels = texture.m_levels; - m_samples = texture.m_samples; - - m_resource = std::move(texture.m_resource); - m_srv_descriptor = texture.m_srv_descriptor; - m_rtv_or_dsv_descriptor = texture.m_rtv_or_dsv_descriptor; - m_state = texture.m_state; - m_is_depth_view = texture.m_is_depth_view; - - texture.ClearBaseProperties(); - texture.m_srv_descriptor = {}; - texture.m_rtv_or_dsv_descriptor = {}; - texture.m_state = D3D12_RESOURCE_STATE_COMMON; - texture.m_is_depth_view = false; - return *this; -} - -D3D12_RESOURCE_DESC D3D12::Texture::GetDesc() const -{ - return m_resource->GetDesc(); -} - -bool D3D12::Texture::IsValid() const -{ - return static_cast(m_resource); -} - -bool D3D12::Texture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, - DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format, - D3D12_RESOURCE_FLAGS flags) -{ - constexpr D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_DEFAULT}; - - if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) - { - Log_ErrorPrintf("Invalid dimensions: %ux%ux%u %u %u", width, height, layers, levels, samples); - return false; - } - - D3D12_RESOURCE_DESC desc = {}; - desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - desc.Width = width; - desc.Height = static_cast(height); - desc.DepthOrArraySize = static_cast(layers); - desc.MipLevels = static_cast(levels); - desc.Format = format; - desc.SampleDesc.Count = samples; - desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - desc.Flags = flags; - - D3D12_CLEAR_VALUE optimized_clear_value = {}; - D3D12_RESOURCE_STATES state; - if (rtv_format != DXGI_FORMAT_UNKNOWN) - { - optimized_clear_value.Format = rtv_format; - state = D3D12_RESOURCE_STATE_RENDER_TARGET; - } - else if (dsv_format != DXGI_FORMAT_UNKNOWN) - { - optimized_clear_value.Format = dsv_format; - state = D3D12_RESOURCE_STATE_DEPTH_WRITE; - } - else - { - state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; - } - - ComPtr resource; - HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource( - &heap_properties, D3D12_HEAP_FLAG_NONE, &desc, state, - (rtv_format != DXGI_FORMAT_UNKNOWN || dsv_format != DXGI_FORMAT_UNKNOWN) ? &optimized_clear_value : nullptr, - IID_PPV_ARGS(resource.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create texture failed: 0x%08X", hr); - return false; - } - - DescriptorHandle srv_descriptor, rtv_descriptor; - bool is_depth_view = false; - if (srv_format != DXGI_FORMAT_UNKNOWN) - { - if (!CreateSRVDescriptor(resource.Get(), srv_format, samples > 1, &srv_descriptor)) - return false; - } - - if (rtv_format != DXGI_FORMAT_UNKNOWN) - { - Assert(dsv_format == DXGI_FORMAT_UNKNOWN); - if (!CreateRTVDescriptor(resource.Get(), rtv_format, samples > 1, &rtv_descriptor)) - { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); - return false; - } - } - else if (dsv_format != DXGI_FORMAT_UNKNOWN) - { - if (!CreateDSVDescriptor(resource.Get(), dsv_format, samples > 1, &rtv_descriptor)) - { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); - return false; - } - - is_depth_view = true; - } - - Destroy(true); - - m_resource = std::move(resource); - m_srv_descriptor = std::move(srv_descriptor); - m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); - m_width = static_cast(width); - m_height = static_cast(height); - m_layers = static_cast(layers); - m_levels = static_cast(levels); - m_samples = static_cast(samples); - m_format = LookupBaseFormat(format); - m_state = state; - m_is_depth_view = is_depth_view; - return true; -} - -bool D3D12::Texture::Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, - DXGI_FORMAT dsv_format, D3D12_RESOURCE_STATES state) -{ - const D3D12_RESOURCE_DESC desc(texture->GetDesc()); - - DescriptorHandle srv_descriptor, rtv_descriptor; - if (srv_format != DXGI_FORMAT_UNKNOWN) - { - if (!CreateSRVDescriptor(texture.Get(), srv_format, desc.SampleDesc.Count > 1, &srv_descriptor)) - return false; - } - - m_is_depth_view = false; - - if (rtv_format != DXGI_FORMAT_UNKNOWN) - { - Assert(dsv_format == DXGI_FORMAT_UNKNOWN); - if (!CreateRTVDescriptor(texture.Get(), rtv_format, desc.SampleDesc.Count > 1, &rtv_descriptor)) - { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); - return false; - } - } - else if (dsv_format != DXGI_FORMAT_UNKNOWN) - { - if (!CreateDSVDescriptor(texture.Get(), dsv_format, desc.SampleDesc.Count > 1, &rtv_descriptor)) - { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); - return false; - } - - m_is_depth_view = true; - } - - m_resource = std::move(texture); - m_srv_descriptor = std::move(srv_descriptor); - m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); - m_width = static_cast(desc.Width); - m_height = static_cast(desc.Height); - m_layers = static_cast(desc.DepthOrArraySize); - m_levels = static_cast(desc.MipLevels); - m_samples = static_cast(desc.SampleDesc.Count); - m_format = LookupBaseFormat(desc.Format); - m_state = state; - return true; -} - -void D3D12::Texture::Destroy(bool defer /* = true */) -{ - if (defer) - { - g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDescriptorHeapManager(), &m_srv_descriptor); - if (m_is_depth_view) - g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDSVHeapManager(), &m_rtv_or_dsv_descriptor); - else - g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetRTVHeapManager(), &m_rtv_or_dsv_descriptor); - g_d3d12_context->DeferResourceDestruction(m_resource.Get()); - m_resource.Reset(); - } - else - { - g_d3d12_context->GetDescriptorHeapManager().Free(&m_srv_descriptor); - if (m_is_depth_view) - g_d3d12_context->GetDSVHeapManager().Free(&m_rtv_or_dsv_descriptor); - else - g_d3d12_context->GetRTVHeapManager().Free(&m_rtv_or_dsv_descriptor); - - m_resource.Reset(); - } - - ClearBaseProperties(); - m_is_depth_view = false; -} - -void D3D12::Texture::TransitionToState(D3D12_RESOURCE_STATES state) const -{ - if (m_state == state) - return; - - ResourceBarrier(g_d3d12_context->GetCommandList(), m_resource.Get(), m_state, state); - m_state = state; -} - -bool D3D12::Texture::BeginStreamUpdate(u32 x, u32 y, u32 width, u32 height, void** out_data, u32* out_data_pitch) -{ - const u32 copy_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 upload_size = copy_pitch * height; - - if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes (%ux%u) in upload buffer", upload_size, width, - height); - g_d3d12_context->ExecuteCommandList(false); - if (!g_d3d12_context->GetTextureStreamBuffer().ReserveMemory(upload_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) - { - Log_ErrorPrintf("Failed to reserve %u bytes for %ux%u upload", upload_size, width, height); - return false; - } - } - - *out_data = g_d3d12_context->GetTextureStreamBuffer().GetCurrentHostPointer(); - *out_data_pitch = copy_pitch; - return true; -} - -void D3D12::Texture::EndStreamUpdate(u32 x, u32 y, u32 width, u32 height) -{ - const u32 copy_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 upload_size = copy_pitch * height; - - StreamBuffer& sb = g_d3d12_context->GetTextureStreamBuffer(); - const u32 sb_offset = sb.GetCurrentOffset(); - sb.CommitMemory(upload_size); - - CopyFromBuffer(x, y, width, height, copy_pitch, sb.GetBuffer(), sb_offset); -} - -void D3D12::Texture::CopyFromBuffer(u32 x, u32 y, u32 width, u32 height, u32 pitch, ID3D12Resource* buffer, - u32 buffer_offset) -{ - D3D12_TEXTURE_COPY_LOCATION src; - src.pResource = buffer; - src.SubresourceIndex = 0; - src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src.PlacedFootprint.Offset = buffer_offset; - src.PlacedFootprint.Footprint.Width = width; - src.PlacedFootprint.Footprint.Height = height; - src.PlacedFootprint.Footprint.Depth = 1; - src.PlacedFootprint.Footprint.RowPitch = pitch; - src.PlacedFootprint.Footprint.Format = GetDXGIFormat(); - - D3D12_TEXTURE_COPY_LOCATION dst; - dst.pResource = m_resource.Get(); - dst.SubresourceIndex = 0; - dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - const D3D12_BOX src_box{0u, 0u, 0u, width, height, 1u}; - const D3D12_RESOURCE_STATES old_state = m_state; - TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); - g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, x, y, 0, &src, &src_box); - TransitionToState(old_state); -} - -bool D3D12::Texture::LoadData(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) -{ - const u32 texel_size = GetPixelSize(); - const u32 upload_pitch = Common::AlignUpPow2(width * texel_size, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 upload_size = upload_pitch * height; - if (upload_size >= g_d3d12_context->GetTextureStreamBuffer().GetSize()) - { - StagingTexture st; - if (!st.Create(width, height, GetDXGIFormat(), true) || !st.WritePixels(0, 0, width, height, data, pitch)) - return false; - - D3D12_RESOURCE_STATES old_state = m_state; - TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); - st.CopyToTexture(0, 0, m_resource.Get(), 0, x, y, width, height); - st.Destroy(true); - TransitionToState(old_state); - return true; - } - - void* write_ptr; - u32 write_pitch; - if (!BeginStreamUpdate(x, y, width, height, &write_ptr, &write_pitch)) - return false; - - CopyToUploadBuffer(data, pitch, height, write_ptr, write_pitch); - EndStreamUpdate(x, y, width, height); - return true; -} - -void D3D12::Texture::CopyToUploadBuffer(const void* src_data, u32 src_pitch, u32 height, void* dst_data, u32 dst_pitch) -{ - const u8* src_ptr = static_cast(src_data); - u8* dst_ptr = static_cast(dst_data); - if (src_pitch == dst_pitch) - { - std::memcpy(dst_ptr, src_ptr, dst_pitch * height); - } - else - { - const u32 copy_size = std::min(src_pitch, dst_pitch); - for (u32 row = 0; row < height; row++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += src_pitch; - dst_ptr += dst_pitch; - } - } -} - -bool D3D12::Texture::CreateSRVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh) -{ - if (!g_d3d12_context->GetDescriptorHeapManager().Allocate(dh)) - { - Log_ErrorPrintf("Failed to allocate SRV descriptor"); - return false; - } - - D3D12_SHADER_RESOURCE_VIEW_DESC desc = { - format, multisampled ? D3D12_SRV_DIMENSION_TEXTURE2DMS : D3D12_SRV_DIMENSION_TEXTURE2D, - D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; - if (!multisampled) - desc.Texture2D.MipLevels = 1; - - g_d3d12_context->GetDevice()->CreateShaderResourceView(resource, &desc, dh->cpu_handle); - return true; -} - -bool D3D12::Texture::CreateRTVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh) -{ - if (!g_d3d12_context->GetRTVHeapManager().Allocate(dh)) - { - Log_ErrorPrintf("Failed to allocate SRV descriptor"); - return false; - } - - D3D12_RENDER_TARGET_VIEW_DESC desc = {format, - multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMS : D3D12_RTV_DIMENSION_TEXTURE2D}; - - g_d3d12_context->GetDevice()->CreateRenderTargetView(resource, &desc, dh->cpu_handle); - return true; -} - -bool D3D12::Texture::CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh) -{ - if (!g_d3d12_context->GetDSVHeapManager().Allocate(dh)) - { - Log_ErrorPrintf("Failed to allocate SRV descriptor"); - return false; - } - - D3D12_DEPTH_STENCIL_VIEW_DESC desc = { - format, multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMS : D3D12_DSV_DIMENSION_TEXTURE2D, D3D12_DSV_FLAG_NONE}; - - g_d3d12_context->GetDevice()->CreateDepthStencilView(resource, &desc, dh->cpu_handle); - return true; -} diff --git a/src/common/d3d12/texture.h b/src/common/d3d12/texture.h deleted file mode 100644 index d43078469..000000000 --- a/src/common/d3d12/texture.h +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../gpu_texture.h" -#include "../windows_headers.h" -#include "descriptor_heap_manager.h" -#include -#include - -namespace D3D12 { - -class StreamBuffer; - -class Texture final : public GPUTexture -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - Texture(); - Texture(ID3D12Resource* resource, D3D12_RESOURCE_STATES state); - Texture(Texture&& texture); - Texture(const Texture&) = delete; - ~Texture(); - - static DXGI_FORMAT GetDXGIFormat(Format format); - static Format LookupBaseFormat(DXGI_FORMAT dformat); - - ALWAYS_INLINE ID3D12Resource* GetResource() const { return m_resource.Get(); } - ALWAYS_INLINE const DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } - ALWAYS_INLINE const DescriptorHandle& GetRTVOrDSVDescriptor() const { return m_rtv_or_dsv_descriptor; } - ALWAYS_INLINE D3D12_RESOURCE_STATES GetState() const { return m_state; } - ALWAYS_INLINE DXGI_FORMAT GetDXGIFormat() const { return GetDXGIFormat(m_format); } - - ALWAYS_INLINE operator ID3D12Resource*() const { return m_resource.Get(); } - ALWAYS_INLINE operator bool() const { return static_cast(m_resource); } - - bool IsValid() const override; - - bool Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, - DXGI_FORMAT dsv_format, D3D12_RESOURCE_FLAGS flags); - bool Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI_FORMAT rtv_format, DXGI_FORMAT dsv_format, - D3D12_RESOURCE_STATES state); - - D3D12_RESOURCE_DESC GetDesc() const; - - void Destroy(bool defer = true); - - void TransitionToState(D3D12_RESOURCE_STATES state) const; - - Texture& operator=(const Texture&) = delete; - Texture& operator=(Texture&& texture); - - bool BeginStreamUpdate(u32 x, u32 y, u32 width, u32 height, void** out_data, u32* out_data_pitch); - void EndStreamUpdate(u32 x, u32 y, u32 width, u32 height); - - bool LoadData(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch); - - static void CopyToUploadBuffer(const void* src_data, u32 src_pitch, u32 height, void* dst_data, u32 dst_pitch); - void CopyFromBuffer(u32 x, u32 y, u32 width, u32 height, u32 pitch, ID3D12Resource* buffer, u32 buffer_offset); - -private: - static bool CreateSRVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh); - static bool CreateRTVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh); - static bool CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, bool multisampled, - DescriptorHandle* dh); - - ComPtr m_resource; - DescriptorHandle m_srv_descriptor = {}; - DescriptorHandle m_rtv_or_dsv_descriptor = {}; - - mutable D3D12_RESOURCE_STATES m_state = D3D12_RESOURCE_STATE_COMMON; - - bool m_is_depth_view = false; -}; - -} // namespace D3D12 \ No newline at end of file diff --git a/src/common/d3d12/util.cpp b/src/common/d3d12/util.cpp deleted file mode 100644 index 607a980b1..000000000 --- a/src/common/d3d12/util.cpp +++ /dev/null @@ -1,392 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "util.h" -#include "../assert.h" -#include "../log.h" -#include "../string.h" -#include "../string_util.h" -#include "context.h" -#include "shader_cache.h" -#include -#include -Log_SetChannel(D3D12); - -namespace D3D12 { - -void ResourceBarrier(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, D3D12_RESOURCE_STATES from_state, - D3D12_RESOURCE_STATES to_state) -{ - const D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, - D3D12_RESOURCE_BARRIER_FLAG_NONE, - {{resource, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, from_state, to_state}}}; - cmdlist->ResourceBarrier(1, &barrier); -} - -void SetViewport(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, float min_depth /*= 0.0f*/, - float max_depth /*= 1.0f*/) -{ - const D3D12_VIEWPORT vp{static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - cmdlist->RSSetViewports(1, &vp); -} - -void SetScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height) -{ - const D3D12_RECT r{x, y, x + width, y + height}; - cmdlist->RSSetScissorRects(1, &r); -} - -void SetViewportAndScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - SetViewport(cmdlist, x, y, width, height, min_depth, max_depth); - SetScissor(cmdlist, x, y, width, height); -} - -void SetViewportAndClampScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - SetViewport(cmdlist, x, y, width, height, min_depth, max_depth); - - const int cx = std::max(x, 0); - const int cy = std::max(y, 0); - const int cwidth = width - (cx - x); - const int cheight = height - (cy - y); - SetScissor(cmdlist, cx, cy, cwidth, cheight); -} - -u32 GetTexelSize(DXGI_FORMAT format) -{ - switch (format) - { - case DXGI_FORMAT_R8G8B8A8_UNORM: - case DXGI_FORMAT_R8G8B8A8_SNORM: - case DXGI_FORMAT_R8G8B8A8_TYPELESS: - case DXGI_FORMAT_B8G8R8A8_UNORM: - case DXGI_FORMAT_B8G8R8A8_TYPELESS: - return 4; - - case DXGI_FORMAT_B5G5R5A1_UNORM: - case DXGI_FORMAT_B5G6R5_UNORM: - return 2; - - default: - Panic("Unknown format"); - return 1; - } -} - -void SetDefaultSampler(D3D12_SAMPLER_DESC* desc) -{ - desc->Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; - desc->AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - desc->AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - desc->MipLODBias = 0; - desc->MaxAnisotropy = 1; - desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; - desc->BorderColor[0] = 1.0f; - desc->BorderColor[1] = 1.0f; - desc->BorderColor[2] = 1.0f; - desc->BorderColor[3] = 1.0f; - desc->MinLOD = -3.402823466e+38F; // -FLT_MAX - desc->MaxLOD = 3.402823466e+38F; // FLT_MAX -} - -#ifdef _DEBUG - -void SetObjectName(ID3D12Object* object, const char* name) -{ - object->SetName(StringUtil::UTF8StringToWideString(name).c_str()); -} - -void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) -{ - std::va_list ap; - va_start(ap, format); - - SmallString str; - str.FormatVA(format, ap); - - SetObjectName(object, str); - va_end(ap); -} - -#endif - -GraphicsPipelineBuilder::GraphicsPipelineBuilder() -{ - Clear(); -} - -void GraphicsPipelineBuilder::Clear() -{ - std::memset(&m_desc, 0, sizeof(m_desc)); - std::memset(m_input_elements.data(), 0, sizeof(D3D12_INPUT_ELEMENT_DESC) * m_input_elements.size()); - m_desc.NodeMask = 1; - m_desc.SampleMask = 0xFFFFFFFF; - m_desc.SampleDesc.Count = 1; -} - -Microsoft::WRL::ComPtr GraphicsPipelineBuilder::Create(ID3D12Device* device, bool clear /*= true*/) -{ - Microsoft::WRL::ComPtr ps; - HRESULT hr = device->CreateGraphicsPipelineState(&m_desc, IID_PPV_ARGS(ps.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateGraphicsPipelineState() failed: %08X", hr); - return {}; - } - - if (clear) - Clear(); - - return ps; -} - -Microsoft::WRL::ComPtr GraphicsPipelineBuilder::Create(ID3D12Device* device, ShaderCache& cache, - bool clear /*= true*/) -{ - Microsoft::WRL::ComPtr pso = cache.GetPipelineState(device, m_desc); - if (!pso) - return {}; - - if (clear) - Clear(); - - return pso; -} - -void GraphicsPipelineBuilder::SetRootSignature(ID3D12RootSignature* rs) -{ - m_desc.pRootSignature = rs; -} - -void GraphicsPipelineBuilder::SetVertexShader(ID3DBlob* blob) -{ - SetVertexShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); -} - -void GraphicsPipelineBuilder::SetVertexShader(const void* data, u32 data_size) -{ - m_desc.VS.pShaderBytecode = data; - m_desc.VS.BytecodeLength = data_size; -} - -void GraphicsPipelineBuilder::SetGeometryShader(ID3DBlob* blob) -{ - SetGeometryShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); -} - -void GraphicsPipelineBuilder::SetGeometryShader(const void* data, u32 data_size) -{ - m_desc.GS.pShaderBytecode = data; - m_desc.GS.BytecodeLength = data_size; -} - -void GraphicsPipelineBuilder::SetPixelShader(ID3DBlob* blob) -{ - SetPixelShader(blob->GetBufferPointer(), static_cast(blob->GetBufferSize())); -} - -void GraphicsPipelineBuilder::SetPixelShader(const void* data, u32 data_size) -{ - m_desc.PS.pShaderBytecode = data; - m_desc.PS.BytecodeLength = data_size; -} - -void GraphicsPipelineBuilder::AddVertexAttribute(const char* semantic_name, u32 semantic_index, DXGI_FORMAT format, - u32 buffer, u32 offset) -{ - const u32 index = m_desc.InputLayout.NumElements; - m_input_elements[index].SemanticIndex = semantic_index; - m_input_elements[index].SemanticName = semantic_name; - m_input_elements[index].Format = format; - m_input_elements[index].AlignedByteOffset = offset; - m_input_elements[index].InputSlot = buffer; - m_input_elements[index].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - m_input_elements[index].InstanceDataStepRate = 0; - - m_desc.InputLayout.pInputElementDescs = m_input_elements.data(); - m_desc.InputLayout.NumElements++; -} - -void GraphicsPipelineBuilder::SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type) -{ - m_desc.PrimitiveTopologyType = type; -} - -void GraphicsPipelineBuilder::SetRasterizationState(D3D12_FILL_MODE polygon_mode, D3D12_CULL_MODE cull_mode, - bool front_face_ccw) -{ - m_desc.RasterizerState.FillMode = polygon_mode; - m_desc.RasterizerState.CullMode = cull_mode; - m_desc.RasterizerState.FrontCounterClockwise = front_face_ccw; -} - -void GraphicsPipelineBuilder::SetMultisamples(u32 multisamples) -{ - m_desc.RasterizerState.MultisampleEnable = multisamples > 1; - m_desc.SampleDesc.Count = multisamples; -} - -void GraphicsPipelineBuilder::SetNoCullRasterizationState() -{ - SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); -} - -void GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op) -{ - m_desc.DepthStencilState.DepthEnable = depth_test; - m_desc.DepthStencilState.DepthWriteMask = depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - m_desc.DepthStencilState.DepthFunc = compare_op; -} - -void GraphicsPipelineBuilder::SetNoDepthTestState() -{ - SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); -} - -void GraphicsPipelineBuilder::SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, - D3D12_BLEND_OP op, D3D12_BLEND alpha_src_factor, - D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op, - u8 write_mask /*= 0xFF*/) -{ - m_desc.BlendState.RenderTarget[rt].BlendEnable = blend_enable; - m_desc.BlendState.RenderTarget[rt].SrcBlend = src_factor; - m_desc.BlendState.RenderTarget[rt].DestBlend = dst_factor; - m_desc.BlendState.RenderTarget[rt].BlendOp = op; - m_desc.BlendState.RenderTarget[rt].SrcBlendAlpha = alpha_src_factor; - m_desc.BlendState.RenderTarget[rt].DestBlendAlpha = alpha_dst_factor; - m_desc.BlendState.RenderTarget[rt].BlendOpAlpha = alpha_op; - m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask; - - if (rt > 0) - m_desc.BlendState.IndependentBlendEnable = TRUE; -} - -void GraphicsPipelineBuilder::SetNoBlendingState() -{ - SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, - D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); - m_desc.BlendState.IndependentBlendEnable = FALSE; -} - -void GraphicsPipelineBuilder::ClearRenderTargets() -{ - m_desc.NumRenderTargets = 0; - for (u32 i = 0; i < sizeof(m_desc.RTVFormats) / sizeof(m_desc.RTVFormats[0]); i++) - m_desc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; -} - -void GraphicsPipelineBuilder::SetRenderTarget(u32 rt, DXGI_FORMAT format) -{ - m_desc.RTVFormats[rt] = format; - if (rt >= m_desc.NumRenderTargets) - m_desc.NumRenderTargets = rt + 1; -} - -void GraphicsPipelineBuilder::ClearDepthStencilFormat() -{ - m_desc.DSVFormat = DXGI_FORMAT_UNKNOWN; -} - -void GraphicsPipelineBuilder::SetDepthStencilFormat(DXGI_FORMAT format) -{ - m_desc.DSVFormat = format; -} - -RootSignatureBuilder::RootSignatureBuilder() -{ - Clear(); -} - -void RootSignatureBuilder::Clear() -{ - m_desc = {}; - m_desc.pParameters = m_params.data(); - m_params = {}; - m_descriptor_ranges = {}; - m_num_descriptor_ranges = 0; -} - -Microsoft::WRL::ComPtr RootSignatureBuilder::Create(bool clear /*= true*/) -{ - Microsoft::WRL::ComPtr rs = g_d3d12_context->CreateRootSignature(&m_desc); - if (!rs) - return {}; - - if (clear) - Clear(); - - return rs; -} - -void RootSignatureBuilder::SetInputAssemblerFlag() -{ - m_desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; -} - -u32 RootSignatureBuilder::Add32BitConstants(u32 shader_reg, u32 num_values, D3D12_SHADER_VISIBILITY visibility) -{ - const u32 index = m_desc.NumParameters++; - - m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - m_params[index].ShaderVisibility = visibility; - m_params[index].Constants.ShaderRegister = shader_reg; - m_params[index].Constants.RegisterSpace = 0; - m_params[index].Constants.Num32BitValues = num_values; - - return index; -} - -u32 RootSignatureBuilder::AddCBVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) -{ - const u32 index = m_desc.NumParameters++; - - m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; - m_params[index].ShaderVisibility = visibility; - m_params[index].Descriptor.ShaderRegister = shader_reg; - m_params[index].Descriptor.RegisterSpace = 0; - - return index; -} - -u32 RootSignatureBuilder::AddSRVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) -{ - const u32 index = m_desc.NumParameters++; - - m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; - m_params[index].ShaderVisibility = visibility; - m_params[index].Descriptor.ShaderRegister = shader_reg; - m_params[index].Descriptor.RegisterSpace = 0; - - return index; -} - -u32 RootSignatureBuilder::AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rt, u32 start_shader_reg, u32 num_shader_regs, - D3D12_SHADER_VISIBILITY visibility) -{ - const u32 index = m_desc.NumParameters++; - const u32 dr_index = m_num_descriptor_ranges++; - - m_descriptor_ranges[dr_index].RangeType = rt; - m_descriptor_ranges[dr_index].NumDescriptors = num_shader_regs; - m_descriptor_ranges[dr_index].BaseShaderRegister = start_shader_reg; - m_descriptor_ranges[dr_index].RegisterSpace = 0; - m_descriptor_ranges[dr_index].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; - - m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - m_params[index].DescriptorTable.pDescriptorRanges = &m_descriptor_ranges[dr_index]; - m_params[index].DescriptorTable.NumDescriptorRanges = 1; - m_params[index].ShaderVisibility = visibility; - - return index; -} - -} // namespace D3D12 \ No newline at end of file diff --git a/src/common/drm_display.cpp b/src/common/drm_display.cpp deleted file mode 100644 index be9da8f6e..000000000 --- a/src/common/drm_display.cpp +++ /dev/null @@ -1,388 +0,0 @@ -#include "drm_display.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/scoped_guard.h" -#include "common/string.h" -#include "file_system.h" -#include -#include -#include -#include -Log_SetChannel(DRMDisplay); - -enum -{ - MAX_CARDS_TO_TRY = 10 -}; - -DRMDisplay::DRMDisplay(int card /*= 1*/) : m_card_id(card) {} - -DRMDisplay::~DRMDisplay() -{ - // restore original buffer - if (m_prev_crtc) - RestoreBuffer(); - - if (m_connector) - drmModeFreeConnector(m_connector); - - if (m_card_fd >= 0) - close(m_card_fd); -} - -// https://gist.github.com/Miouyouyou/89e9fe56a2c59bce7d4a18a858f389ef - -static uint32_t find_crtc_for_encoder(const drmModeRes* resources, const drmModeEncoder* encoder) -{ - int i; - - for (i = 0; i < resources->count_crtcs; i++) - { - /* possible_crtcs is a bitmask as described here: - * https://dvdhrm.wordpress.com/2012/09/13/linux-drm-mode-setting-api - */ - const uint32_t crtc_mask = 1 << i; - const uint32_t crtc_id = resources->crtcs[i]; - if (encoder->possible_crtcs & crtc_mask) - { - return crtc_id; - } - } - - /* no match found */ - return -1; -} - -static uint32_t find_crtc_for_connector(int card_fd, const drmModeRes* resources, const drmModeConnector* connector) -{ - int i; - - for (i = 0; i < connector->count_encoders; i++) - { - const uint32_t encoder_id = connector->encoders[i]; - drmModeEncoder* encoder = drmModeGetEncoder(card_fd, encoder_id); - - if (encoder) - { - const uint32_t crtc_id = find_crtc_for_encoder(resources, encoder); - - drmModeFreeEncoder(encoder); - if (crtc_id != 0) - { - return crtc_id; - } - } - } - - /* no match found */ - return -1; -} - -bool DRMDisplay::Initialize(u32 width, u32 height, float refresh_rate) -{ - if (m_card_id < 0) - { - for (int i = 0; i < MAX_CARDS_TO_TRY; i++) - { - if (TryOpeningCard(i, width, height, refresh_rate)) - return true; - } - - return false; - } - - return TryOpeningCard(m_card_id, width, height, refresh_rate); -} - -void DRMDisplay::RestoreBuffer() -{ - if (m_prev_crtc) - { - u32 connector_id = m_connector->connector_id; - drmModeSetCrtc(m_card_fd, m_prev_crtc->crtc_id, m_prev_crtc->buffer_id, m_prev_crtc->x, m_prev_crtc->y, - &connector_id, 1, &m_prev_crtc->mode); - drmModeFreeCrtc(m_prev_crtc); - m_prev_crtc = nullptr; - } -} - -bool DRMDisplay::TryOpeningCard(int card, u32 width, u32 height, float refresh_rate) -{ - if (m_card_fd >= 0) - close(m_card_fd); - - m_card_fd = open(TinyString::FromFormat("/dev/dri/card%d", card), O_RDWR); - if (m_card_fd < 0) - { - Log_ErrorPrintf("open(/dev/dri/card%d) failed: %d (%s)", card, errno, strerror(errno)); - return false; - } - - drmModeRes* resources = drmModeGetResources(m_card_fd); - if (!resources) - { - Log_ErrorPrintf("drmModeGetResources() failed: %d (%s)", errno, strerror(errno)); - return false; - } - - Assert(!m_connector); - - for (int i = 0; i < resources->count_connectors; i++) - { - drmModeConnector* next_connector = drmModeGetConnector(m_card_fd, resources->connectors[i]); - if (next_connector->connection == DRM_MODE_CONNECTED) - { - m_connector = next_connector; - break; - } - - drmModeFreeConnector(next_connector); - } - - if (!m_connector) - { - Log_ErrorPrintf("No connector found"); - drmModeFreeResources(resources); - return false; - } - - for (int i = 0; i < m_connector->count_modes; i++) - { - drmModeModeInfo* next_mode = &m_connector->modes[i]; - - const float mode_refresh_rate = (static_cast(next_mode->clock) * 1000.0f) / - (static_cast(next_mode->htotal) * static_cast(next_mode->vtotal)); - Log_DevPrintf("Checking mode %u: %ux%u @ %f hz", i, next_mode->hdisplay, next_mode->vdisplay, mode_refresh_rate); - - if (width == 0 && height == 0) - { - // use preferred mode if we're auto selecting - if (next_mode->type & DRM_MODE_TYPE_PREFERRED) - { - m_mode = next_mode; - break; - } - else if (!m_mode) - { - m_mode = next_mode; - } - } - else - { - if (width == next_mode->hdisplay && height == next_mode->vdisplay && - (refresh_rate == 0.0f || std::abs(mode_refresh_rate - refresh_rate) < 0.1f)) - { - m_mode = next_mode; - break; - } - } - } - - if (!m_mode) - { - Log_ErrorPrintf("No mode found"); - drmModeFreeResources(resources); - return false; - } - - drmModeEncoder* encoder = nullptr; - for (int i = 0; i < resources->count_encoders; i++) - { - drmModeEncoder* next_encoder = drmModeGetEncoder(m_card_fd, resources->encoders[i]); - if (next_encoder->encoder_id == m_connector->encoder_id) - { - encoder = next_encoder; - m_crtc_id = encoder->crtc_id; - break; - } - - drmModeFreeEncoder(next_encoder); - } - - if (encoder) - { - drmModeFreeEncoder(encoder); - } - else - { - m_crtc_id = find_crtc_for_connector(m_card_fd, resources, m_connector); - if (m_crtc_id == 0) - { - Log_ErrorPrintf("No CRTC found"); - drmModeFreeResources(resources); - return false; - } - } - - drmModeFreeResources(resources); - - m_card_id = card; - m_prev_crtc = drmModeGetCrtc(m_card_fd, m_crtc_id); - return true; -} - -std::optional DRMDisplay::AddBuffer(u32 width, u32 height, u32 format, u32 handle, u32 pitch, u32 offset) -{ - uint32_t bo_handles[4] = {handle, 0, 0, 0}; - uint32_t pitches[4] = {pitch, 0, 0, 0}; - uint32_t offsets[4] = {offset, 0, 0, 0}; - - u32 fb_id; - int res = drmModeAddFB2(m_card_fd, width, height, format, bo_handles, pitches, offsets, &fb_id, 0); - if (res != 0) - { - Log_ErrorPrintf("drmModeAddFB2() failed: %d", res); - return std::nullopt; - } - - return fb_id; -} - -void DRMDisplay::RemoveBuffer(u32 fb_id) -{ - drmModeRmFB(m_card_fd, fb_id); -} - -void DRMDisplay::PresentBuffer(u32 fb_id, bool wait_for_vsync) -{ - if (!wait_for_vsync) - { - u32 connector_id = m_connector->connector_id; - int res = drmModeSetCrtc(m_card_fd, m_crtc_id, fb_id, 0, 0, &connector_id, 1, m_mode); - if (res != 0) - Log_ErrorPrintf("drmModeSetCrtc() failed: %d", res); - - return; - } - - bool waiting_for_flip = true; - drmEventContext event_ctx = {}; - event_ctx.version = DRM_EVENT_CONTEXT_VERSION; - event_ctx.page_flip_handler = [](int fd, unsigned int frame, unsigned int sec, unsigned int usec, void* data) { - *reinterpret_cast(data) = false; - }; - - int res = drmModePageFlip(m_card_fd, m_crtc_id, fb_id, DRM_MODE_PAGE_FLIP_EVENT, &waiting_for_flip); - if (res != 0) - { - Log_ErrorPrintf("drmModePageFlip() failed: %d", res); - return; - } - - while (waiting_for_flip) - { - fd_set fds; - FD_ZERO(&fds); - FD_SET(m_card_fd, &fds); - int res = select(m_card_fd + 1, &fds, nullptr, nullptr, nullptr); - if (res < 0) - { - Log_ErrorPrintf("select() failed: %d", errno); - break; - } - else if (res == 0) - { - continue; - } - - drmHandleEvent(m_card_fd, &event_ctx); - } -} - -bool DRMDisplay::GetCurrentMode(u32* width, u32* height, float* refresh_rate, int card, int connector) -{ - int card_fd = -1; - if (card < 0) - { - for (int try_card = 0; try_card < MAX_CARDS_TO_TRY; try_card++) - { - card_fd = open(TinyString::FromFormat("/dev/dri/card%d", try_card), O_RDWR); - if (card_fd >= 0) - break; - } - } - else - { - card_fd = open(TinyString::FromFormat("/dev/dri/card%d", card), O_RDWR); - } - - if (card_fd < 0) - { - Log_ErrorPrintf("open(/dev/dri/card%d) failed: %d (%s)", card, errno, strerror(errno)); - return false; - } - - ScopedGuard card_guard([card_fd]() { close(card_fd); }); - - drmModeRes* resources = drmModeGetResources(card_fd); - if (!resources) - { - Log_ErrorPrintf("drmModeGetResources() failed: %d (%s)", errno, strerror(errno)); - return false; - } - - ScopedGuard resources_guard([resources]() { drmModeFreeResources(resources); }); - drmModeConnector* connector_ptr = nullptr; - if (connector < 0) - { - for (int i = 0; i < resources->count_connectors; i++) - { - connector_ptr = drmModeGetConnector(card_fd, resources->connectors[i]); - if (connector_ptr->connection == DRM_MODE_CONNECTED) - break; - - drmModeFreeConnector(connector_ptr); - } - } - else if (connector < resources->count_connectors) - { - connector_ptr = drmModeGetConnector(card_fd, resources->connectors[connector]); - } - - ScopedGuard connector_guard([connector_ptr]() { - if (connector_ptr) - drmModeFreeConnector(connector_ptr); - }); - if (!connector_ptr || connector_ptr->connection != DRM_MODE_CONNECTED) - { - Log_ErrorPrintf("No connector found"); - return false; - } - - drmModeEncoder* encoder = drmModeGetEncoder(card_fd, connector_ptr->encoder_id); - if (!encoder) - { - Log_ErrorPrint("No encoder found"); - return false; - } - - ScopedGuard encoder_guard([encoder]() { drmModeFreeEncoder(encoder); }); - - drmModeCrtc* crtc = drmModeGetCrtc(card_fd, encoder->crtc_id); - if (!crtc) - { - Log_ErrorPrint("No CRTC found"); - return false; - } - - if (!crtc->mode_valid) - { - Log_ErrorPrint("CRTC mode not valid"); - return false; - } - - const u32 current_width = static_cast(crtc->mode.hdisplay); - const u32 current_height = static_cast(crtc->mode.vdisplay); - const float current_refresh_rate = (static_cast(crtc->mode.clock) * 1000.0f) / - (static_cast(crtc->mode.htotal) * static_cast(crtc->mode.vtotal)); - Log_InfoPrintf("Current mode for card %d: %ux%u@%f", card, current_width, current_height, current_refresh_rate); - - if (width) - *width = current_width; - if (height) - *height = current_height; - if (refresh_rate) - *refresh_rate = current_refresh_rate; - - return true; -} diff --git a/src/common/drm_display.h b/src/common/drm_display.h deleted file mode 100644 index da1ac56be..000000000 --- a/src/common/drm_display.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once -#include "types.h" -#include -#include -#include -#include - -class DRMDisplay -{ -public: - DRMDisplay(int card = -1); - ~DRMDisplay(); - - static bool GetCurrentMode(u32* width, u32* height, float* refresh_rate, int card = -1, int connector = -1); - - bool Initialize(u32 width, u32 height, float refresh_rate); - - /// Restores the buffer saved at startup. - void RestoreBuffer(); - - int GetCardID() const { return m_card_id; } - int GetCardFD() const { return m_card_fd; } - u32 GetWidth() const { return m_mode->hdisplay; } - u32 GetHeight() const { return m_mode->vdisplay; } - float GetRefreshRate() const - { - return (static_cast(m_mode->clock) * 1000.0f) / - (static_cast(m_mode->htotal) * static_cast(m_mode->vtotal)); - } - - u32 GetModeCount() const { return m_connector->count_modes; } - u32 GetModeWidth(u32 i) const { return m_connector->modes[i].hdisplay; } - u32 GetModeHeight(u32 i) const { return m_connector->modes[i].vdisplay; } - float GetModeRefreshRate(u32 i) const - { - return (static_cast(m_connector->modes[i].clock) * 1000.0f) / - (static_cast(m_connector->modes[i].htotal) * static_cast(m_connector->modes[i].vtotal)); - } - - std::optional AddBuffer(u32 width, u32 height, u32 format, u32 handle, u32 pitch, u32 offset); - void RemoveBuffer(u32 fb_id); - void PresentBuffer(u32 fb_id, bool wait_for_vsync); - -private: - enum : u32 - { - MAX_BUFFERS = 5 - }; - - bool TryOpeningCard(int card, u32 width, u32 height, float refresh_rate); - - int m_card_id = 0; - int m_card_fd = -1; - u32 m_crtc_id = 0; - - drmModeRes* m_resources = nullptr; - drmModeConnector* m_connector = nullptr; - drmModeModeInfo* m_mode = nullptr; - - drmModeCrtc* m_prev_crtc = nullptr; -}; diff --git a/src/common/gl/context_egl_android.cpp b/src/common/gl/context_egl_android.cpp deleted file mode 100644 index 6ca861f52..000000000 --- a/src/common/gl/context_egl_android.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "context_egl_android.h" -#include "../log.h" -#include -Log_SetChannel(GL::ContextEGLAndroid); - -namespace GL { -ContextEGLAndroid::ContextEGLAndroid(const WindowInfo& wi) : ContextEGL(wi) {} -ContextEGLAndroid::~ContextEGLAndroid() = default; - -std::unique_ptr ContextEGLAndroid::Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try) -{ - std::unique_ptr context = std::make_unique(wi); - if (!context->Initialize(versions_to_try, num_versions_to_try)) - return nullptr; - - return context; -} - -std::unique_ptr ContextEGLAndroid::CreateSharedContext(const WindowInfo& wi) -{ - std::unique_ptr context = std::make_unique(wi); - context->m_display = m_display; - - if (!context->CreateContextAndSurface(m_version, m_context, false)) - return nullptr; - - return context; -} - -EGLNativeWindowType ContextEGLAndroid::GetNativeWindow(EGLConfig config) -{ - EGLint native_visual_id = 0; - if (!eglGetConfigAttrib(m_display, m_config, EGL_NATIVE_VISUAL_ID, &native_visual_id)) - { - Log_ErrorPrintf("Failed to get native visual ID"); - return 0; - } - - ANativeWindow_setBuffersGeometry(static_cast(m_wi.window_handle), 0, 0, static_cast(native_visual_id)); - m_wi.surface_width = ANativeWindow_getWidth(static_cast(m_wi.window_handle)); - m_wi.surface_height = ANativeWindow_getHeight(static_cast(m_wi.window_handle)); - return static_cast(m_wi.window_handle); -} -} // namespace GL diff --git a/src/common/gl/context_egl_android.h b/src/common/gl/context_egl_android.h deleted file mode 100644 index c8241e8ea..000000000 --- a/src/common/gl/context_egl_android.h +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "context_egl.h" - -namespace GL { - -class ContextEGLAndroid final : public ContextEGL -{ -public: - ContextEGLAndroid(const WindowInfo& wi); - ~ContextEGLAndroid() override; - - static std::unique_ptr Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try); - - std::unique_ptr CreateSharedContext(const WindowInfo& wi) override; - -protected: - EGLNativeWindowType GetNativeWindow(EGLConfig config) override; -}; - -} // namespace GL diff --git a/src/common/gl/context_egl_fbdev.cpp b/src/common/gl/context_egl_fbdev.cpp deleted file mode 100644 index b69fc58bc..000000000 --- a/src/common/gl/context_egl_fbdev.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "context_egl_fbdev.h" - -namespace GL { -ContextEGLFBDev::ContextEGLFBDev(const WindowInfo& wi) : ContextEGL(wi) {} -ContextEGLFBDev::~ContextEGLFBDev() = default; - -std::unique_ptr ContextEGLFBDev::Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try) -{ - std::unique_ptr context = std::make_unique(wi); - if (!context->Initialize(versions_to_try, num_versions_to_try)) - return nullptr; - - return context; -} - -std::unique_ptr ContextEGLFBDev::CreateSharedContext(const WindowInfo& wi) -{ - std::unique_ptr context = std::make_unique(wi); - context->m_display = m_display; - - if (!context->CreateContextAndSurface(m_version, m_context, false)) - return nullptr; - - return context; -} - -EGLNativeWindowType ContextEGLFBDev::GetNativeWindow(EGLConfig config) -{ - return static_cast(0); -} -} // namespace GL diff --git a/src/common/gl/context_egl_fbdev.h b/src/common/gl/context_egl_fbdev.h deleted file mode 100644 index 35f5b8cf4..000000000 --- a/src/common/gl/context_egl_fbdev.h +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "context_egl.h" - -namespace GL { - -class ContextEGLFBDev final : public ContextEGL -{ -public: - ContextEGLFBDev(const WindowInfo& wi); - ~ContextEGLFBDev() override; - - static std::unique_ptr Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try); - - std::unique_ptr CreateSharedContext(const WindowInfo& wi) override; - -protected: - EGLNativeWindowType GetNativeWindow(EGLConfig config) override; -}; - -} // namespace GL diff --git a/src/common/gl/context_egl_gbm.cpp b/src/common/gl/context_egl_gbm.cpp deleted file mode 100644 index 2f8c7de70..000000000 --- a/src/common/gl/context_egl_gbm.cpp +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "context_egl_gbm.h" -#include "../assert.h" -#include "../log.h" -#include -#include -#include -Log_SetChannel(GL::ContextEGLGBM); - -namespace GL { -ContextEGLGBM::ContextEGLGBM(const WindowInfo& wi) : ContextEGL(wi) -{ -#ifdef CONTEXT_EGL_GBM_USE_PRESENT_THREAD - StartPresentThread(); -#endif -} - -ContextEGLGBM::~ContextEGLGBM() -{ -#ifdef CONTEXT_EGL_GBM_USE_PRESENT_THREAD - StopPresentThread(); - Assert(!m_current_present_buffer); -#endif - - m_drm_display.RestoreBuffer(); - - // We have to destroy the context before the surface/device. - // Leaving it to the base class would be too late. - DestroySurface(); - DestroyContext(); - - while (m_num_buffers > 0) - { - Buffer& buffer = m_buffers[--m_num_buffers]; - m_drm_display.RemoveBuffer(buffer.fb_id); - } - - if (m_fb_surface) - gbm_surface_destroy(m_fb_surface); - - if (m_gbm_device) - gbm_device_destroy(m_gbm_device); -} - -std::unique_ptr ContextEGLGBM::Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try) -{ - std::unique_ptr context = std::make_unique(wi); - if (!context->CreateDisplay() || !context->CreateGBMDevice() || - !context->Initialize(versions_to_try, num_versions_to_try)) - { - return nullptr; - } - - return context; -} - -std::unique_ptr ContextEGLGBM::CreateSharedContext(const WindowInfo& wi) -{ - std::unique_ptr context = std::make_unique(wi); - context->m_display = m_display; - - if (!context->CreateContextAndSurface(m_version, m_context, false)) - return nullptr; - - return context; -} - -void ContextEGLGBM::ResizeSurface(u32 new_surface_width, u32 new_surface_height) -{ - ContextEGL::ResizeSurface(new_surface_width, new_surface_height); -} - -bool ContextEGLGBM::CreateGBMDevice() -{ - Assert(!m_gbm_device); - m_gbm_device = gbm_create_device(m_drm_display.GetCardFD()); - if (!m_gbm_device) - { - Log_ErrorPrintf("gbm_create_device() failed: %d", errno); - return false; - } - - return true; -} - -bool ContextEGLGBM::CreateDisplay() -{ - if (!m_drm_display.Initialize(m_wi.surface_width, m_wi.surface_height, m_wi.surface_refresh_rate)) - return false; - - m_wi.surface_width = m_drm_display.GetWidth(); - m_wi.surface_height = m_drm_display.GetHeight(); - m_wi.surface_refresh_rate = m_drm_display.GetRefreshRate(); - return true; -} - -bool ContextEGLGBM::SetDisplay() -{ - if (!eglGetPlatformDisplayEXT) - { - Log_ErrorPrintf("eglGetPlatformDisplayEXT() not loaded"); - return false; - } - - m_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_GBM_KHR, m_gbm_device, nullptr); - if (!m_display) - { - Log_ErrorPrintf("eglGetPlatformDisplayEXT() failed"); - return false; - } - - return true; -} - -EGLNativeWindowType ContextEGLGBM::GetNativeWindow(EGLConfig config) -{ - EGLint visual_id; - eglGetConfigAttrib(m_display, config, EGL_NATIVE_VISUAL_ID, &visual_id); - - Assert(!m_fb_surface); - m_fb_surface = gbm_surface_create(m_gbm_device, m_drm_display.GetWidth(), m_drm_display.GetHeight(), - static_cast(visual_id), GBM_BO_USE_RENDERING | GBM_BO_USE_SCANOUT); - if (!m_fb_surface) - { - Log_ErrorPrintf("gbm_surface_create() failed: %d", errno); - return {}; - } - - return (EGLNativeWindowType)((void*)m_fb_surface); -} - -ContextEGLGBM::Buffer* ContextEGLGBM::LockFrontBuffer() -{ - struct gbm_bo* bo = gbm_surface_lock_front_buffer(m_fb_surface); - - Buffer* buffer = nullptr; - for (u32 i = 0; i < m_num_buffers; i++) - { - if (m_buffers[i].bo == bo) - { - buffer = &m_buffers[i]; - break; - } - } - - if (!buffer) - { - // haven't tracked this buffer yet - Assert(m_num_buffers < MAX_BUFFERS); - - const u32 width = gbm_bo_get_width(bo); - const u32 height = gbm_bo_get_height(bo); - const u32 stride = gbm_bo_get_stride(bo); - const u32 format = gbm_bo_get_format(bo); - const u32 handle = gbm_bo_get_handle(bo).u32; - - std::optional fb_id = m_drm_display.AddBuffer(width, height, format, handle, stride, 0); - if (!fb_id.has_value()) - return nullptr; - - buffer = &m_buffers[m_num_buffers]; - buffer->bo = bo; - buffer->fb_id = fb_id.value(); - m_num_buffers++; - } - - return buffer; -} - -void ContextEGLGBM::ReleaseBuffer(Buffer* buffer) -{ - gbm_surface_release_buffer(m_fb_surface, buffer->bo); -} - -void ContextEGLGBM::PresentBuffer(Buffer* buffer, bool wait_for_vsync) -{ - m_drm_display.PresentBuffer(buffer->fb_id, wait_for_vsync); -} - -bool ContextEGLGBM::SwapBuffers() -{ - if (!ContextEGL::SwapBuffers()) - return false; - -#ifdef CONTEXT_EGL_GBM_USE_PRESENT_THREAD - std::unique_lock lock(m_present_mutex); - m_present_pending.store(true); - m_present_cv.notify_one(); - if (m_vsync) - m_present_done_cv.wait(lock, [this]() { return !m_present_pending.load(); }); -#else - Buffer* front_buffer = LockFrontBuffer(); - if (!front_buffer) - return false; - - PresentSurface(front_buffer, m_vsync && m_last_front_buffer); - - if (m_last_front_buffer) - ReleaseBuffer(m_last_front_buffer); - - m_last_front_buffer = front_buffer; -#endif - - return true; -} - -bool ContextEGLGBM::SetSwapInterval(s32 interval) -{ - if (interval < 0 || interval > 1) - return false; - - std::unique_lock lock(m_present_mutex); - m_vsync = (interval > 0); - return true; -} - -std::vector ContextEGLGBM::EnumerateFullscreenModes() -{ - std::vector modes; - modes.reserve(m_drm_display.GetModeCount()); - for (u32 i = 0; i < m_drm_display.GetModeCount(); i++) - { - modes.push_back(FullscreenModeInfo{m_drm_display.GetModeWidth(i), m_drm_display.GetModeHeight(i), - m_drm_display.GetModeRefreshRate(i)}); - } - return modes; -} - -#ifdef CONTEXT_EGL_GBM_USE_PRESENT_THREAD - -void ContextEGLGBM::StartPresentThread() -{ - m_present_thread_shutdown.store(false); - m_present_thread = std::thread(&ContextEGLGBM::PresentThread, this); -} - -void ContextEGLGBM::StopPresentThread() -{ - if (!m_present_thread.joinable()) - return; - - { - std::unique_lock lock(m_present_mutex); - m_present_thread_shutdown.store(true); - m_present_cv.notify_one(); - } - - m_present_thread.join(); -} - -void ContextEGLGBM::PresentThread() -{ - std::unique_lock lock(m_present_mutex); - - while (!m_present_thread_shutdown.load()) - { - m_present_cv.wait(lock); - - if (!m_present_pending.load()) - continue; - - Buffer* next_buffer = LockFrontBuffer(); - const bool wait_for_vsync = m_vsync && m_current_present_buffer; - - lock.unlock(); - PresentBuffer(next_buffer, wait_for_vsync); - lock.lock(); - - if (m_current_present_buffer) - ReleaseBuffer(m_current_present_buffer); - - m_current_present_buffer = next_buffer; - m_present_pending.store(false); - m_present_done_cv.notify_one(); - } - - if (m_current_present_buffer) - { - ReleaseBuffer(m_current_present_buffer); - m_current_present_buffer = nullptr; - } -} - -#endif - -} // namespace GL diff --git a/src/common/gl/context_egl_gbm.h b/src/common/gl/context_egl_gbm.h deleted file mode 100644 index 0bc0d8aef..000000000 --- a/src/common/gl/context_egl_gbm.h +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../drm_display.h" -#include "context_egl.h" -#include -#include -#include -#include -#include - -#define CONTEXT_EGL_GBM_USE_PRESENT_THREAD 1 - -namespace GL { - -class ContextEGLGBM final : public ContextEGL -{ -public: - ContextEGLGBM(const WindowInfo& wi); - ~ContextEGLGBM() override; - - static std::unique_ptr Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try); - - std::unique_ptr CreateSharedContext(const WindowInfo& wi) override; - void ResizeSurface(u32 new_surface_width = 0, u32 new_surface_height = 0) override; - - bool SwapBuffers() override; - bool SetSwapInterval(s32 interval) override; - - std::vector EnumerateFullscreenModes() override; - -protected: - bool SetDisplay() override; - EGLNativeWindowType GetNativeWindow(EGLConfig config) override; - -private: - enum : u32 - { - MAX_BUFFERS = 5 - }; - - struct Buffer - { - struct gbm_bo* bo; - u32 fb_id; - }; - - bool CreateDisplay(); - - bool CreateGBMDevice(); - Buffer* LockFrontBuffer(); - void ReleaseBuffer(Buffer* buffer); - void PresentBuffer(Buffer* buffer, bool wait_for_vsync); - - void StartPresentThread(); - void StopPresentThread(); - void PresentThread(); - - DRMDisplay m_drm_display; - struct gbm_device* m_gbm_device = nullptr; - struct gbm_surface* m_fb_surface = nullptr; - bool m_vsync = true; - -#ifdef CONTEXT_EGL_GBM_USE_PRESENT_THREAD - std::thread m_present_thread; - std::mutex m_present_mutex; - std::condition_variable m_present_cv; - std::atomic_bool m_present_pending{false}; - std::atomic_bool m_present_thread_shutdown{false}; - std::condition_variable m_present_done_cv; - - Buffer* m_current_present_buffer = nullptr; -#endif - - u32 m_num_buffers = 0; - std::array m_buffers{}; -}; - -} // namespace GL diff --git a/src/common/gl/context_glx.cpp b/src/common/gl/context_glx.cpp deleted file mode 100644 index 568e715c4..000000000 --- a/src/common/gl/context_glx.cpp +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "context_glx.h" -#include "../assert.h" -#include "../log.h" -#include -Log_SetChannel(GL::ContextGLX); - -namespace GL { -ContextGLX::ContextGLX(const WindowInfo& wi) : Context(wi) {} - -ContextGLX::~ContextGLX() -{ - if (glXGetCurrentContext() == m_context) - glXMakeContextCurrent(GetDisplay(), None, None, None); - - if (m_context) - glXDestroyContext(GetDisplay(), m_context); - - if (m_vi) - XFree(m_vi); - - if (m_libGL_handle) - dlclose(m_libGL_handle); -} - -std::unique_ptr ContextGLX::Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try) -{ - std::unique_ptr context = std::make_unique(wi); - if (!context->Initialize(versions_to_try, num_versions_to_try)) - return nullptr; - - return context; -} - -bool ContextGLX::Initialize(const Version* versions_to_try, size_t num_versions_to_try) -{ - // We need libGL loaded, because GLAD loads its own, then releases it. - m_libGL_handle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL); - if (!m_libGL_handle) - { - m_libGL_handle = dlopen("libGL.so", RTLD_NOW | RTLD_GLOBAL); - if (!m_libGL_handle) - { - Log_ErrorPrintf("Failed to load libGL.so: %s", dlerror()); - return false; - } - } - - const int screen = DefaultScreen(GetDisplay()); - if (!gladLoadGLX(GetDisplay(), screen)) - { - Log_ErrorPrintf("Loading GLAD GLX functions failed"); - return false; - } - - if (m_wi.type == WindowInfo::Type::X11) - { - if (!CreateWindow(screen)) - return false; - } - - for (size_t i = 0; i < num_versions_to_try; i++) - { - const Version& cv = versions_to_try[i]; - if (cv.profile == Profile::NoProfile && CreateAnyContext(nullptr, true)) - { - m_version = cv; - return true; - } - else if (cv.profile != Profile::NoProfile && CreateVersionContext(cv, nullptr, true)) - { - m_version = cv; - return true; - } - } - - return false; -} - -void* ContextGLX::GetProcAddress(const char* name) -{ - return reinterpret_cast(glXGetProcAddress(reinterpret_cast(name))); -} - -bool ContextGLX::ChangeSurface(const WindowInfo& new_wi) -{ - const bool was_current = (glXGetCurrentContext() == m_context); - if (was_current) - glXMakeContextCurrent(GetDisplay(), None, None, None); - - m_window.Destroy(); - m_wi = new_wi; - - if (new_wi.type == WindowInfo::Type::X11) - { - const int screen = DefaultScreen(GetDisplay()); - if (!CreateWindow(screen)) - return false; - } - - if (was_current && !glXMakeContextCurrent(GetDisplay(), GetDrawable(), GetDrawable(), m_context)) - { - Log_ErrorPrintf("Failed to make context current again after surface change"); - return false; - } - - return true; -} - -void ContextGLX::ResizeSurface(u32 new_surface_width /*= 0*/, u32 new_surface_height /*= 0*/) -{ - m_window.Resize(new_surface_width, new_surface_height); - m_wi.surface_width = m_window.GetWidth(); - m_wi.surface_height = m_window.GetHeight(); -} - -bool ContextGLX::SwapBuffers() -{ - glXSwapBuffers(GetDisplay(), GetDrawable()); - return true; -} - -bool ContextGLX::IsCurrent() -{ - return (m_context && glXGetCurrentContext() == m_context); -} - -bool ContextGLX::MakeCurrent() -{ - return (glXMakeContextCurrent(GetDisplay(), GetDrawable(), GetDrawable(), m_context) == True); -} - -bool ContextGLX::DoneCurrent() -{ - return (glXMakeContextCurrent(GetDisplay(), None, None, None) == True); -} - -bool ContextGLX::SetSwapInterval(s32 interval) -{ - if (GLAD_GLX_EXT_swap_control) - { - glXSwapIntervalEXT(GetDisplay(), GetDrawable(), interval); - return true; - } - else if (GLAD_GLX_MESA_swap_control) - { - return (glXSwapIntervalMESA(static_cast(std::max(interval, 0))) != 0); - } - else if (GLAD_GLX_SGI_swap_control) - { - return (glXSwapIntervalSGI(interval) != 0); - } - else - { - return false; - } -} - -std::unique_ptr ContextGLX::CreateSharedContext(const WindowInfo& wi) -{ - std::unique_ptr context = std::make_unique(wi); - if (wi.type == WindowInfo::Type::X11) - { - const int screen = DefaultScreen(context->GetDisplay()); - if (!context->CreateWindow(screen)) - return nullptr; - } - else - { - Panic("Create pbuffer"); - } - - if (m_version.profile == Profile::NoProfile) - { - if (!context->CreateAnyContext(m_context, false)) - return nullptr; - } - else - { - if (!context->CreateVersionContext(m_version, m_context, false)) - return nullptr; - } - - context->m_version = m_version; - return context; -} - -bool ContextGLX::CreateWindow(int screen) -{ - int attribs[32] = {GLX_X_RENDERABLE, True, GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, - GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, GLX_DOUBLEBUFFER, True}; - int nattribs = 8; - - switch (m_wi.surface_format) - { - case WindowInfo::SurfaceFormat::RGB8: - attribs[nattribs++] = GLX_RED_SIZE; - attribs[nattribs++] = 8; - attribs[nattribs++] = GLX_GREEN_SIZE; - attribs[nattribs++] = 8; - attribs[nattribs++] = GLX_BLUE_SIZE; - attribs[nattribs++] = 8; - break; - - case WindowInfo::SurfaceFormat::RGBA8: - attribs[nattribs++] = GLX_RED_SIZE; - attribs[nattribs++] = 8; - attribs[nattribs++] = GLX_GREEN_SIZE; - attribs[nattribs++] = 8; - attribs[nattribs++] = GLX_BLUE_SIZE; - attribs[nattribs++] = 8; - attribs[nattribs++] = GLX_ALPHA_SIZE; - attribs[nattribs++] = 8; - break; - - case WindowInfo::SurfaceFormat::RGB565: - attribs[nattribs++] = GLX_RED_SIZE; - attribs[nattribs++] = 5; - attribs[nattribs++] = GLX_GREEN_SIZE; - attribs[nattribs++] = 6; - attribs[nattribs++] = GLX_BLUE_SIZE; - attribs[nattribs++] = 5; - break; - - case WindowInfo::SurfaceFormat::Auto: - break; - - default: - UnreachableCode(); - break; - } - - attribs[nattribs++] = None; - attribs[nattribs++] = 0; - - int fbcount = 0; - GLXFBConfig* fbc = glXChooseFBConfig(GetDisplay(), screen, attribs, &fbcount); - if (!fbc || !fbcount) - { - Log_ErrorPrintf("glXChooseFBConfig() failed"); - return false; - } - m_fb_config = *fbc; - XFree(fbc); - - if (!GLAD_GLX_VERSION_1_3) - { - Log_ErrorPrintf("GLX Version 1.3 is required"); - return false; - } - - m_vi = glXGetVisualFromFBConfig(GetDisplay(), m_fb_config); - if (!m_vi) - { - Log_ErrorPrintf("glXGetVisualFromFBConfig() failed"); - return false; - } - - return m_window.Create(GetDisplay(), static_cast(reinterpret_cast(m_wi.window_handle)), m_vi); -} - -bool ContextGLX::CreateAnyContext(GLXContext share_context, bool make_current) -{ - X11InhibitErrors ie; - - m_context = glXCreateContext(GetDisplay(), m_vi, share_context, True); - if (!m_context || ie.HadError()) - { - Log_ErrorPrintf("glxCreateContext() failed"); - return false; - } - - if (make_current) - { - if (!glXMakeCurrent(GetDisplay(), GetDrawable(), m_context)) - { - Log_ErrorPrintf("glXMakeCurrent() failed"); - return false; - } - } - - return true; -} - -bool ContextGLX::CreateVersionContext(const Version& version, GLXContext share_context, bool make_current) -{ - // we need create context attribs - if (!GLAD_GLX_VERSION_1_3) - { - Log_ErrorPrint("Missing GLX version 1.3."); - return false; - } - - int attribs[32]; - int nattribs = 0; - attribs[nattribs++] = GLX_CONTEXT_PROFILE_MASK_ARB; - attribs[nattribs++] = - ((version.profile == Profile::ES) ? - ((version.major_version >= 2) ? GLX_CONTEXT_ES2_PROFILE_BIT_EXT : GLX_CONTEXT_ES_PROFILE_BIT_EXT) : - GLX_CONTEXT_CORE_PROFILE_BIT_ARB); - attribs[nattribs++] = GLX_CONTEXT_MAJOR_VERSION_ARB; - attribs[nattribs++] = version.major_version; - attribs[nattribs++] = GLX_CONTEXT_MINOR_VERSION_ARB; - attribs[nattribs++] = version.minor_version; - attribs[nattribs++] = None; - attribs[nattribs++] = 0; - - X11InhibitErrors ie; - m_context = glXCreateContextAttribsARB(GetDisplay(), m_fb_config, share_context, True, attribs); - XSync(GetDisplay(), False); - if (ie.HadError()) - m_context = nullptr; - if (!m_context) - return false; - - if (make_current) - { - if (!glXMakeContextCurrent(GetDisplay(), GetDrawable(), GetDrawable(), m_context)) - { - Log_ErrorPrint("glXMakeContextCurrent() failed"); - glXDestroyContext(GetDisplay(), m_context); - m_context = nullptr; - return false; - } - } - - return true; -} -} // namespace GL diff --git a/src/common/gl/context_glx.h b/src/common/gl/context_glx.h deleted file mode 100644 index 5ed48a9e3..000000000 --- a/src/common/gl/context_glx.h +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "context.h" -#include "glad_glx.h" -#include "x11_window.h" - -namespace GL { - -class ContextGLX final : public Context -{ -public: - ContextGLX(const WindowInfo& wi); - ~ContextGLX() override; - - static std::unique_ptr Create(const WindowInfo& wi, const Version* versions_to_try, - size_t num_versions_to_try); - - void* GetProcAddress(const char* name) override; - bool ChangeSurface(const WindowInfo& new_wi) override; - void ResizeSurface(u32 new_surface_width = 0, u32 new_surface_height = 0) override; - bool SwapBuffers() override; - bool IsCurrent() override; - bool MakeCurrent() override; - bool DoneCurrent() override; - bool SetSwapInterval(s32 interval) override; - std::unique_ptr CreateSharedContext(const WindowInfo& wi) override; - -private: - ALWAYS_INLINE Display* GetDisplay() const { return static_cast(m_wi.display_connection); } - ALWAYS_INLINE GLXDrawable GetDrawable() const { return static_cast(m_window.GetWindow()); } - - bool Initialize(const Version* versions_to_try, size_t num_versions_to_try); - bool CreateWindow(int screen); - bool CreateAnyContext(GLXContext share_context, bool make_current); - bool CreateVersionContext(const Version& version, GLXContext share_context, bool make_current); - - GLXContext m_context = nullptr; - GLXFBConfig m_fb_config = {}; - XVisualInfo* m_vi = nullptr; - X11Window m_window; - - // GLAD releases its reference to libGL.so, so we need to maintain our own. - void* m_libGL_handle = nullptr; -}; - -} // namespace GL diff --git a/src/common/gl/program.cpp b/src/common/gl/program.cpp deleted file mode 100644 index 204260eee..000000000 --- a/src/common/gl/program.cpp +++ /dev/null @@ -1,622 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "program.h" -#include "../assert.h" -#include "../log.h" -#include "../string_util.h" -#include -#include -Log_SetChannel(GL); - -namespace GL { - -GLuint Program::s_last_program_id = 0; -static GLuint s_next_bad_shader_id = 1; - -Program::Program() = default; - -Program::Program(Program&& prog) -{ - m_program_id = prog.m_program_id; - prog.m_program_id = 0; - m_vertex_shader_id = prog.m_vertex_shader_id; - prog.m_vertex_shader_id = 0; - m_fragment_shader_id = prog.m_fragment_shader_id; - prog.m_fragment_shader_id = 0; - m_uniform_locations = std::move(prog.m_uniform_locations); -} - -Program::~Program() -{ - Destroy(); -} - -GLuint Program::CompileShader(GLenum type, const std::string_view source) -{ - GLuint id = glCreateShader(type); - - std::array sources = {{source.data()}}; - std::array source_lengths = {{static_cast(source.size())}}; - glShaderSource(id, static_cast(sources.size()), sources.data(), source_lengths.data()); - glCompileShader(id); - - GLint status = GL_FALSE; - glGetShaderiv(id, GL_COMPILE_STATUS, &status); - - GLint info_log_length = 0; - glGetShaderiv(id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (status == GL_FALSE || info_log_length > 0) - { - std::string info_log; - info_log.resize(info_log_length + 1); - glGetShaderInfoLog(id, info_log_length, &info_log_length, &info_log[0]); - - if (status == GL_TRUE) - { - Log_ErrorPrintf("Shader compiled with warnings:\n%s", info_log.c_str()); - } - else - { - Log_ErrorPrintf("Shader failed to compile:\n%s", info_log.c_str()); - - std::ofstream ofs(StringUtil::StdStringFromFormat("bad_shader_%u.txt", s_next_bad_shader_id++).c_str(), - std::ofstream::out | std::ofstream::binary); - if (ofs.is_open()) - { - ofs.write(sources[0], source_lengths[0]); - ofs << "\n\nCompile failed, info log:\n"; - ofs << info_log; - ofs.close(); - } - - glDeleteShader(id); - return 0; - } - } - - return id; -} - -void Program::ResetLastProgram() -{ - s_last_program_id = 0; -} - -bool Program::Compile(const std::string_view vertex_shader,const std::string_view fragment_shader) -{ - if (m_vertex_shader_id != 0) - { - glDeleteShader(m_vertex_shader_id); - m_vertex_shader_id = 0; - } - if (m_fragment_shader_id != 0) - { - glDeleteShader(m_fragment_shader_id); - m_fragment_shader_id = 0; - } - - if (!vertex_shader.empty()) - { - m_vertex_shader_id = CompileShader(GL_VERTEX_SHADER, vertex_shader); - if (m_vertex_shader_id == 0) - return false; - } - - if (!fragment_shader.empty()) - { - m_fragment_shader_id = CompileShader(GL_FRAGMENT_SHADER, fragment_shader); - if (m_fragment_shader_id == 0) - { - glDeleteShader(m_fragment_shader_id); - return false; - } - } - - m_program_id = glCreateProgram(); - if (m_vertex_shader_id != 0) - glAttachShader(m_program_id, m_vertex_shader_id); - if (m_fragment_shader_id != 0) - glAttachShader(m_program_id, m_fragment_shader_id); - return true; -} - -bool Program::CreateFromBinary(const void* data, u32 data_length, u32 data_format) -{ - GLuint prog = glCreateProgram(); - glProgramBinary(prog, static_cast(data_format), data, data_length); - - GLint link_status; - glGetProgramiv(prog, GL_LINK_STATUS, &link_status); - if (link_status != GL_TRUE) - { - Log_ErrorPrintf("Failed to create GL program from binary: status %d", link_status); - glDeleteProgram(prog); - return false; - } - - m_program_id = prog; - return true; -} - -bool Program::GetBinary(std::vector* out_data, u32* out_data_format) -{ - GLint binary_size = 0; - glGetProgramiv(m_program_id, GL_PROGRAM_BINARY_LENGTH, &binary_size); - if (binary_size == 0) - { - Log_WarningPrint("glGetProgramiv(GL_PROGRAM_BINARY_LENGTH) returned 0"); - return false; - } - - GLenum format = 0; - out_data->resize(static_cast(binary_size)); - glGetProgramBinary(m_program_id, binary_size, &binary_size, &format, out_data->data()); - if (binary_size == 0) - { - Log_WarningPrint("glGetProgramBinary() failed"); - return false; - } - else if (static_cast(binary_size) != out_data->size()) - { - Log_WarningPrintf("Size changed from %zu to %d after glGetProgramBinary()", out_data->size(), binary_size); - out_data->resize(static_cast(binary_size)); - } - - *out_data_format = static_cast(format); - Log_DevPrintf("Program binary retrieved, %zu bytes, format %u", out_data->size(), *out_data_format); - return true; -} - -void Program::SetBinaryRetrievableHint() -{ - glProgramParameteri(m_program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); -} - -void Program::BindAttribute(GLuint index, const char* name) -{ - glBindAttribLocation(m_program_id, index, name); -} - -void Program::BindDefaultAttributes() -{ - BindAttribute(0, "a_position"); - BindAttribute(1, "a_texcoord"); - BindAttribute(2, "a_color"); -} - -void Program::BindFragData(GLuint index /*= 0*/, const char* name /*= "o_col0"*/) -{ - glBindFragDataLocation(m_program_id, index, name); -} - -void Program::BindFragDataIndexed(GLuint color_number /*= 0*/, const char* name /*= "o_col0"*/) -{ - if (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended) - { - glBindFragDataLocationIndexed(m_program_id, color_number, 0, name); - return; - } - else if (GLAD_GL_EXT_blend_func_extended) - { - glBindFragDataLocationIndexedEXT(m_program_id, color_number, 0, name); - return; - } - - Log_ErrorPrintf("BindFragDataIndexed() called without ARB or EXT extension, we'll probably crash."); - glBindFragDataLocationIndexed(m_program_id, color_number, 0, name); -} - -bool Program::Link() -{ - glLinkProgram(m_program_id); - - if (m_vertex_shader_id != 0) - glDeleteShader(m_vertex_shader_id); - m_vertex_shader_id = 0; - if (m_fragment_shader_id != 0) - glDeleteShader(m_fragment_shader_id); - m_fragment_shader_id = 0; - - GLint status = GL_FALSE; - glGetProgramiv(m_program_id, GL_LINK_STATUS, &status); - - GLint info_log_length = 0; - glGetProgramiv(m_program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (status == GL_FALSE || info_log_length > 0) - { - std::string info_log; - info_log.resize(info_log_length + 1); - glGetProgramInfoLog(m_program_id, info_log_length, &info_log_length, &info_log[0]); - - if (status == GL_TRUE) - { - Log_ErrorPrintf("Program linked with warnings:\n%s", info_log.c_str()); - } - else - { - Log_ErrorPrintf("Program failed to link:\n%s", info_log.c_str()); - glDeleteProgram(m_program_id); - m_program_id = 0; - return false; - } - } - - return true; -} - -void Program::Bind() const -{ - if (s_last_program_id == m_program_id) - return; - - glUseProgram(m_program_id); - s_last_program_id = m_program_id; -} - -void Program::Destroy() -{ - if (m_vertex_shader_id != 0) - { - glDeleteShader(m_vertex_shader_id); - m_vertex_shader_id = 0; - } - if (m_fragment_shader_id != 0) - { - glDeleteShader(m_fragment_shader_id); - m_fragment_shader_id = 0; - } - if (m_program_id != 0) - { - glDeleteProgram(m_program_id); - m_program_id = 0; - } - - m_uniform_locations.clear(); -} - -int Program::RegisterUniform(const char* name) -{ - int id = static_cast(m_uniform_locations.size()); - m_uniform_locations.push_back(glGetUniformLocation(m_program_id, name)); - return id; -} - -void Program::Uniform1ui(int index, u32 x) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform1ui(location, x); -} - -void Program::Uniform2ui(int index, u32 x, u32 y) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2ui(location, x, y); -} - -void Program::Uniform3ui(int index, u32 x, u32 y, u32 z) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3ui(location, x, y, z); -} - -void Program::Uniform4ui(int index, u32 x, u32 y, u32 z, u32 w) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4ui(location, x, y, z, w); -} - -void Program::Uniform1i(int index, s32 x) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform1i(location, x); -} - -void Program::Uniform2i(int index, s32 x, s32 y) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2i(location, x, y); -} - -void Program::Uniform3i(int index, s32 x, s32 y, s32 z) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3i(location, x, y, z); -} - -void Program::Uniform4i(int index, s32 x, s32 y, s32 z, s32 w) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4i(location, x, y, z, w); -} - -void Program::Uniform1f(int index, float x) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform1f(location, x); -} - -void Program::Uniform2f(int index, float x, float y) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2f(location, x, y); -} - -void Program::Uniform3f(int index, float x, float y, float z) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3f(location, x, y, z); -} - -void Program::Uniform4f(int index, float x, float y, float z, float w) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4f(location, x, y, z, w); -} - -void Program::Uniform2uiv(int index, const u32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2uiv(location, 1, v); -} - -void Program::Uniform3uiv(int index, const u32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3uiv(location, 1, v); -} - -void Program::Uniform4uiv(int index, const u32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4uiv(location, 1, v); -} - -void Program::Uniform2iv(int index, const s32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2iv(location, 1, v); -} - -void Program::Uniform3iv(int index, const s32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3iv(location, 1, v); -} - -void Program::Uniform4iv(int index, const s32* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4iv(location, 1, v); -} - -void Program::Uniform2fv(int index, const float* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform2fv(location, 1, v); -} - -void Program::Uniform3fv(int index, const float* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform3fv(location, 1, v); -} - -void Program::Uniform4fv(int index, const float* v) const -{ - Assert(static_cast(index) < m_uniform_locations.size()); - const GLint location = m_uniform_locations[index]; - if (location >= 0) - glUniform4fv(location, 1, v); -} - -void Program::Uniform1ui(const char* name, u32 x) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform1ui(location, x); -} - -void Program::Uniform2ui(const char* name, u32 x, u32 y) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2ui(location, x, y); -} - -void Program::Uniform3ui(const char* name, u32 x, u32 y, u32 z) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3ui(location, x, y, z); -} - -void Program::Uniform4ui(const char* name, u32 x, u32 y, u32 z, u32 w) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4ui(location, x, y, z, w); -} - -void Program::Uniform1i(const char* name, s32 x) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform1i(location, x); -} - -void Program::Uniform2i(const char* name, s32 x, s32 y) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2i(location, x, y); -} - -void Program::Uniform3i(const char* name, s32 x, s32 y, s32 z) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3i(location, x, y, z); -} - -void Program::Uniform4i(const char* name, s32 x, s32 y, s32 z, s32 w) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4i(location, x, y, z, w); -} - -void Program::Uniform1f(const char* name, float x) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform1f(location, x); -} - -void Program::Uniform2f(const char* name, float x, float y) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2f(location, x, y); -} - -void Program::Uniform3f(const char* name, float x, float y, float z) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3f(location, x, y, z); -} - -void Program::Uniform4f(const char* name, float x, float y, float z, float w) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4f(location, x, y, z, w); -} - -void Program::Uniform2uiv(const char* name, const u32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2uiv(location, 1, v); -} - -void Program::Uniform3uiv(const char* name, const u32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3uiv(location, 1, v); -} - -void Program::Uniform4uiv(const char* name, const u32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4uiv(location, 1, v); -} - -void Program::Uniform2iv(const char* name, const s32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2iv(location, 1, v); -} - -void Program::Uniform3iv(const char* name, const s32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3iv(location, 1, v); -} - -void Program::Uniform4iv(const char* name, const s32* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4iv(location, 1, v); -} - -void Program::Uniform2fv(const char* name, const float* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform2fv(location, 1, v); -} - -void Program::Uniform3fv(const char* name, const float* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform3fv(location, 1, v); -} - -void Program::Uniform4fv(const char* name, const float* v) const -{ - const GLint location = glGetUniformLocation(m_program_id, name); - if (location >= 0) - glUniform4fv(location, 1, v); -} - -void Program::BindUniformBlock(const char* name, u32 index) -{ - const GLint location = glGetUniformBlockIndex(m_program_id, name); - if (location >= 0) - glUniformBlockBinding(m_program_id, location, index); -} - -Program& Program::operator=(Program&& prog) -{ - Destroy(); - m_program_id = prog.m_program_id; - prog.m_program_id = 0; - m_vertex_shader_id = prog.m_vertex_shader_id; - prog.m_vertex_shader_id = 0; - m_fragment_shader_id = prog.m_fragment_shader_id; - prog.m_fragment_shader_id = 0; - m_uniform_locations = std::move(prog.m_uniform_locations); - return *this; -} - -} // namespace GL diff --git a/src/common/gl/program.h b/src/common/gl/program.h deleted file mode 100644 index 8fe43b507..000000000 --- a/src/common/gl/program.h +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../types.h" -#include "loader.h" -#include -#include - -namespace GL { -class Program -{ -public: - Program(); - Program(const Program&) = delete; - Program(Program&& prog); - ~Program(); - - static GLuint CompileShader(GLenum type, const std::string_view source); - static void ResetLastProgram(); - - bool IsVaild() const { return m_program_id != 0; } - bool IsBound() const { return s_last_program_id == m_program_id; } - - bool Compile(const std::string_view vertex_shader, const std::string_view fragment_shader); - - bool CreateFromBinary(const void* data, u32 data_length, u32 data_format); - - bool GetBinary(std::vector* out_data, u32* out_data_format); - void SetBinaryRetrievableHint(); - - void BindAttribute(GLuint index, const char* name); - void BindDefaultAttributes(); - - void BindFragData(GLuint index = 0, const char* name = "o_col0"); - void BindFragDataIndexed(GLuint color_number = 0, const char* name = "o_col0"); - - bool Link(); - - void Bind() const; - - void Destroy(); - - int RegisterUniform(const char* name); - void Uniform1ui(int index, u32 x) const; - void Uniform2ui(int index, u32 x, u32 y) const; - void Uniform3ui(int index, u32 x, u32 y, u32 z) const; - void Uniform4ui(int index, u32 x, u32 y, u32 z, u32 w) const; - void Uniform1i(int index, s32 x) const; - void Uniform2i(int index, s32 x, s32 y) const; - void Uniform3i(int index, s32 x, s32 y, s32 z) const; - void Uniform4i(int index, s32 x, s32 y, s32 z, s32 w) const; - void Uniform1f(int index, float x) const; - void Uniform2f(int index, float x, float y) const; - void Uniform3f(int index, float x, float y, float z) const; - void Uniform4f(int index, float x, float y, float z, float w) const; - void Uniform2uiv(int index, const u32* v) const; - void Uniform3uiv(int index, const u32* v) const; - void Uniform4uiv(int index, const u32* v) const; - void Uniform2iv(int index, const s32* v) const; - void Uniform3iv(int index, const s32* v) const; - void Uniform4iv(int index, const s32* v) const; - void Uniform2fv(int index, const float* v) const; - void Uniform3fv(int index, const float* v) const; - void Uniform4fv(int index, const float* v) const; - - void Uniform1ui(const char* name, u32 x) const; - void Uniform2ui(const char* name, u32 x, u32 y) const; - void Uniform3ui(const char* name, u32 x, u32 y, u32 z) const; - void Uniform4ui(const char* name, u32 x, u32 y, u32 z, u32 w) const; - void Uniform1i(const char* name, s32 x) const; - void Uniform2i(const char* name, s32 x, s32 y) const; - void Uniform3i(const char* name, s32 x, s32 y, s32 z) const; - void Uniform4i(const char* name, s32 x, s32 y, s32 z, s32 w) const; - void Uniform1f(const char* name, float x) const; - void Uniform2f(const char* name, float x, float y) const; - void Uniform3f(const char* name, float x, float y, float z) const; - void Uniform4f(const char* name, float x, float y, float z, float w) const; - void Uniform2uiv(const char* name, const u32* v) const; - void Uniform3uiv(const char* name, const u32* v) const; - void Uniform4uiv(const char* name, const u32* v) const; - void Uniform2iv(const char* name, const s32* v) const; - void Uniform3iv(const char* name, const s32* v) const; - void Uniform4iv(const char* name, const s32* v) const; - void Uniform2fv(const char* name, const float* v) const; - void Uniform3fv(const char* name, const float* v) const; - void Uniform4fv(const char* name, const float* v) const; - - void BindUniformBlock(const char* name, u32 index); - - Program& operator=(const Program&) = delete; - Program& operator=(Program&& prog); - -private: - static u32 s_last_program_id; - - GLuint m_program_id = 0; - GLuint m_vertex_shader_id = 0; - GLuint m_fragment_shader_id = 0; - - std::vector m_uniform_locations; -}; - -} // namespace GL \ No newline at end of file diff --git a/src/common/gl/shader_cache.cpp b/src/common/gl/shader_cache.cpp deleted file mode 100644 index 286a73cab..000000000 --- a/src/common/gl/shader_cache.cpp +++ /dev/null @@ -1,344 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_cache.h" -#include "../file_system.h" -#include "../log.h" -#include "../md5_digest.h" -#include "../path.h" -#include "../string_util.h" -Log_SetChannel(GL::ShaderCache); - -#pragma pack(push, 1) -struct CacheIndexEntry -{ - u64 vertex_source_hash_low; - u64 vertex_source_hash_high; - u32 vertex_source_length; - u64 geometry_source_hash_low; - u64 geometry_source_hash_high; - u32 geometry_source_length; - u64 fragment_source_hash_low; - u64 fragment_source_hash_high; - u32 fragment_source_length; - u32 file_offset; - u32 blob_size; - u32 blob_format; -}; -#pragma pack(pop) - -GL::ShaderCache::ShaderCache() = default; - -GL::ShaderCache::~ShaderCache() -{ - Close(); -} - -bool GL::ShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const -{ - return ( - vertex_source_hash_low == key.vertex_source_hash_low && vertex_source_hash_high == key.vertex_source_hash_high && - vertex_source_length == key.vertex_source_length && fragment_source_hash_low == key.fragment_source_hash_low && - fragment_source_hash_high == key.fragment_source_hash_high && fragment_source_length == key.fragment_source_length); -} - -bool GL::ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const -{ - return ( - vertex_source_hash_low != key.vertex_source_hash_low || vertex_source_hash_high != key.vertex_source_hash_high || - vertex_source_length != key.vertex_source_length || fragment_source_hash_low != key.fragment_source_hash_low || - fragment_source_hash_high != key.fragment_source_hash_high || fragment_source_length != key.fragment_source_length); -} - -void GL::ShaderCache::Open(bool is_gles, std::string_view base_path, u32 version) -{ - m_base_path = base_path; - m_version = version; - m_program_binary_supported = is_gles || GLAD_GL_ARB_get_program_binary; - if (m_program_binary_supported) - { - // check that there's at least one format and the extension isn't being "faked" - GLint num_formats = 0; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - Log_InfoPrintf("%u program binary formats supported by driver", num_formats); - m_program_binary_supported = (num_formats > 0); - } - - if (!m_program_binary_supported) - { - Log_WarningPrintf("Your GL driver does not support program binaries. Hopefully it has a built-in cache, otherwise " - "startup will be slow due to compiling shaders."); - return; - } - - if (!base_path.empty()) - { - const std::string index_filename = GetIndexFileName(); - const std::string blob_filename = GetBlobFileName(); - - if (!ReadExisting(index_filename, blob_filename)) - CreateNew(index_filename, blob_filename); - } -} - -bool GL::ShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename) -{ - if (FileSystem::FileExists(index_filename.c_str())) - { - Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); - FileSystem::DeleteFile(index_filename.c_str()); - } - if (FileSystem::FileExists(blob_filename.c_str())) - { - Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); - FileSystem::DeleteFile(blob_filename.c_str()); - } - - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); - if (!m_index_file) - { - Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); - return false; - } - - const u32 index_version = FILE_VERSION; - if (std::fwrite(&index_version, sizeof(index_version), 1, m_index_file) != 1 || - std::fwrite(&m_version, sizeof(m_version), 1, m_index_file) != 1) - { - Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - return true; -} - -bool GL::ShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename) -{ - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); - if (!m_index_file) - return false; - - u32 file_version = 0; - u32 data_version = 0; - if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != FILE_VERSION || - std::fread(&data_version, sizeof(data_version), 1, m_index_file) != 1 || data_version != m_version) - { - Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - std::fseek(m_blob_file, 0, SEEK_END); - const u32 blob_file_size = static_cast(std::ftell(m_blob_file)); - - for (;;) - { - CacheIndexEntry entry; - if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 || - (entry.file_offset + entry.blob_size) > blob_file_size) - { - if (std::feof(m_index_file)) - break; - - Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); - m_index.clear(); - std::fclose(m_blob_file); - m_blob_file = nullptr; - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - const CacheIndexKey key{entry.vertex_source_hash_low, entry.vertex_source_hash_high, - entry.vertex_source_length, entry.fragment_source_hash_low, - entry.fragment_source_hash_high, entry.fragment_source_length}; - const CacheIndexData data{entry.file_offset, entry.blob_size, entry.blob_format}; - m_index.emplace(key, data); - } - - Log_InfoPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str()); - return true; -} - -void GL::ShaderCache::Close() -{ - m_index.clear(); - if (m_index_file) - std::fclose(m_index_file); - if (m_blob_file) - std::fclose(m_blob_file); -} - -bool GL::ShaderCache::Recreate() -{ - Close(); - - const std::string index_filename = GetIndexFileName(); - const std::string blob_filename = GetBlobFileName(); - - return CreateNew(index_filename, blob_filename); -} - -GL::ShaderCache::CacheIndexKey GL::ShaderCache::GetCacheKey(const std::string_view& vertex_shader, - const std::string_view& fragment_shader) -{ - union ShaderHash - { - struct - { - u64 low; - u64 high; - }; - u8 bytes[16]; - }; - - ShaderHash vertex_hash = {}; - ShaderHash fragment_hash = {}; - - MD5Digest digest; - if (!vertex_shader.empty()) - { - digest.Update(vertex_shader.data(), static_cast(vertex_shader.length())); - digest.Final(vertex_hash.bytes); - } - - if (!fragment_shader.empty()) - { - digest.Reset(); - digest.Update(fragment_shader.data(), static_cast(fragment_shader.length())); - digest.Final(fragment_hash.bytes); - } - - return CacheIndexKey{vertex_hash.low, vertex_hash.high, static_cast(vertex_shader.length()), - fragment_hash.low, fragment_hash.high, static_cast(fragment_shader.length())}; -} - -std::string GL::ShaderCache::GetIndexFileName() const -{ - return Path::Combine(m_base_path, "gl_programs.idx"); -} - -std::string GL::ShaderCache::GetBlobFileName() const -{ - return Path::Combine(m_base_path, "gl_programs.bin"); -} - -std::optional GL::ShaderCache::GetProgram(const std::string_view vertex_shader, - const std::string_view fragment_shader, - const PreLinkCallback& callback) -{ - if (!m_program_binary_supported || !m_blob_file) - return CompileProgram(vertex_shader, fragment_shader, callback, false); - - const auto key = GetCacheKey(vertex_shader, fragment_shader); - auto iter = m_index.find(key); - if (iter == m_index.end()) - return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback); - - std::vector data(iter->second.blob_size); - if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || - std::fread(data.data(), 1, iter->second.blob_size, m_blob_file) != iter->second.blob_size) - { - Log_ErrorPrintf("Read blob from file failed"); - return {}; - } - - Program prog; - if (prog.CreateFromBinary(data.data(), static_cast(data.size()), iter->second.blob_format)) - return std::optional(std::move(prog)); - - Log_WarningPrintf( - "Failed to create program from binary, this may be due to a driver or GPU Change. Recreating cache."); - if (!Recreate()) - return CompileProgram(vertex_shader, fragment_shader, callback, false); - else - return CompileAndAddProgram(key, vertex_shader, fragment_shader, callback); -} - -std::optional GL::ShaderCache::CompileProgram(const std::string_view& vertex_shader, - const std::string_view& fragment_shader, - const PreLinkCallback& callback, bool set_retrievable) -{ - Program prog; - if (!prog.Compile(vertex_shader, fragment_shader)) - return std::nullopt; - - if (callback) - callback(prog); - - if (set_retrievable) - prog.SetBinaryRetrievableHint(); - - if (!prog.Link()) - return std::nullopt; - - return std::optional(std::move(prog)); -} - -std::optional GL::ShaderCache::CompileAndAddProgram(const CacheIndexKey& key, - const std::string_view& vertex_shader, - const std::string_view& fragment_shader, - const PreLinkCallback& callback) -{ - std::optional prog = CompileProgram(vertex_shader, fragment_shader, callback, true); - if (!prog) - return std::nullopt; - - std::vector prog_data; - u32 prog_format = 0; - if (!prog->GetBinary(&prog_data, &prog_format)) - return std::nullopt; - - if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0) - return prog; - - CacheIndexData data; - data.file_offset = static_cast(std::ftell(m_blob_file)); - data.blob_size = static_cast(prog_data.size()); - data.blob_format = prog_format; - - CacheIndexEntry entry = {}; - entry.vertex_source_hash_low = key.vertex_source_hash_low; - entry.vertex_source_hash_high = key.vertex_source_hash_high; - entry.vertex_source_length = key.vertex_source_length; - entry.fragment_source_hash_low = key.fragment_source_hash_low; - entry.fragment_source_hash_high = key.fragment_source_hash_high; - entry.fragment_source_length = key.fragment_source_length; - entry.file_offset = data.file_offset; - entry.blob_size = data.blob_size; - entry.blob_format = data.blob_format; - - if (std::fwrite(prog_data.data(), 1, entry.blob_size, m_blob_file) != entry.blob_size || - std::fflush(m_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || - std::fflush(m_index_file) != 0) - { - Log_ErrorPrintf("Failed to write shader blob to file"); - return prog; - } - - m_index.emplace(key, data); - return prog; -} diff --git a/src/common/gl/shader_cache.h b/src/common/gl/shader_cache.h deleted file mode 100644 index a65278831..000000000 --- a/src/common/gl/shader_cache.h +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../hash_combine.h" -#include "../types.h" -#include "program.h" -#include -#include -#include -#include -#include -#include -#include - -namespace GL { - -class ShaderCache -{ -public: - using PreLinkCallback = std::function; - - ShaderCache(); - ~ShaderCache(); - - void Open(bool is_gles, std::string_view base_path, u32 version); - - std::optional GetProgram(const std::string_view vertex_shader, const std::string_view fragment_shader, - const PreLinkCallback& callback = {}); - -private: - static constexpr u32 FILE_VERSION = 4; - - struct CacheIndexKey - { - u64 vertex_source_hash_low; - u64 vertex_source_hash_high; - u32 vertex_source_length; - u64 fragment_source_hash_low; - u64 fragment_source_hash_high; - u32 fragment_source_length; - - bool operator==(const CacheIndexKey& key) const; - bool operator!=(const CacheIndexKey& key) const; - }; - - struct CacheIndexEntryHasher - { - std::size_t operator()(const CacheIndexKey& e) const noexcept - { - std::size_t h = 0; - hash_combine(h, e.vertex_source_hash_low, e.vertex_source_hash_high, e.vertex_source_length, - e.fragment_source_hash_low, e.fragment_source_hash_high, e.fragment_source_length); - return h; - } - }; - - struct CacheIndexData - { - u32 file_offset; - u32 blob_size; - u32 blob_format; - }; - - using CacheIndex = std::unordered_map; - - static CacheIndexKey GetCacheKey(const std::string_view& vertex_shader, const std::string_view& fragment_shader); - - std::string GetIndexFileName() const; - std::string GetBlobFileName() const; - - bool CreateNew(const std::string& index_filename, const std::string& blob_filename); - bool ReadExisting(const std::string& index_filename, const std::string& blob_filename); - void Close(); - bool Recreate(); - - std::optional CompileProgram(const std::string_view& vertex_shader, const std::string_view& fragment_shader, - const PreLinkCallback& callback, bool set_retrievable); - std::optional CompileAndAddProgram(const CacheIndexKey& key, const std::string_view& vertex_shader, - const std::string_view& fragment_shader, const PreLinkCallback& callback); - - std::string m_base_path; - std::FILE* m_index_file = nullptr; - std::FILE* m_blob_file = nullptr; - - CacheIndex m_index; - u32 m_version = 0; - bool m_program_binary_supported = false; -}; - -} // namespace GL diff --git a/src/common/gl/texture.cpp b/src/common/gl/texture.cpp deleted file mode 100644 index 08bcce89a..000000000 --- a/src/common/gl/texture.cpp +++ /dev/null @@ -1,379 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "texture.h" -#include "../assert.h" -#include "../log.h" -#include -#include -#include -Log_SetChannel(GL); - -const std::tuple& GL::Texture::GetPixelFormatMapping(GPUTexture::Format format) -{ - static constexpr std::array, static_cast(GPUTexture::Format::Count)> mapping = - {{ - {}, // Unknown - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 - }}; - - static constexpr std::array, static_cast(GPUTexture::Format::Count)> - mapping_gles2 = {{ - {}, // Unknown - {GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {}, // BGRA8 - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {}, // RGBA5551 - {}, // R8 - {}, // D16 - }}; - - if (!GLAD_GL_ES_VERSION_2_0 || GLAD_GL_ES_VERSION_3_0) - return mapping[static_cast(format)]; - else - return mapping_gles2[static_cast(format)]; -} - -GL::Texture::Texture() = default; - -GL::Texture::Texture(Texture&& moved) : m_id(moved.m_id), m_fbo_id(moved.m_fbo_id) -{ - m_width = moved.m_width; - m_height = moved.m_height; - m_levels = moved.m_levels; - m_layers = moved.m_layers; - m_samples = moved.m_samples; - m_format = moved.m_format; - moved.m_id = 0; - moved.m_fbo_id = 0; - moved.ClearBaseProperties(); -} - -GL::Texture::~Texture() -{ - Destroy(); -} - -bool GL::Texture::UseTextureStorage(bool multisampled) -{ - return GLAD_GL_ARB_texture_storage || (multisampled ? GLAD_GL_ES_VERSION_3_1 : GLAD_GL_ES_VERSION_3_0); -} - -bool GL::Texture::UseTextureStorage() const -{ - return UseTextureStorage(IsMultisampled()); -} - -bool GL::Texture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Format format, - const void* data /* = nullptr */, u32 data_pitch /* = 0 */, bool linear /* = true */, - bool wrap /* = true */) -{ - glGetError(); - - if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) - { - Log_ErrorPrintf("Invalid dimensions: %ux%ux%u %u %u", width, height, layers, levels, samples); - return false; - } - - if (samples > 1 && levels > 1) - { - Log_ErrorPrintf("Multisampled textures can't have mip levels"); - return false; - } - - if (layers > 1 && data) - { - Log_ErrorPrintf("Loading texture array data not currently supported"); - return false; - } - - const GLenum target = ((samples > 1) ? ((layers > 1) ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D_MULTISAMPLE_ARRAY) : - ((layers > 1) ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D)); - const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(format); - - GLuint id; - glGenTextures(1, &id); - glBindTexture(target, id); - - if (samples > 1) - { - Assert(!data); - if (UseTextureStorage(true)) - { - if (layers > 1) - glTexStorage3DMultisample(target, samples, gl_internal_format, width, height, layers, GL_FALSE); - else - glTexStorage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); - } - else - { - if (layers > 1) - glTexImage3DMultisample(target, samples, gl_internal_format, width, height, layers, GL_FALSE); - else - glTexImage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); - } - - glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels); - } - else - { - if (UseTextureStorage(false)) - { - if (layers > 1) - glTexStorage3D(target, levels, gl_internal_format, width, height, layers); - else - glTexStorage2D(target, levels, gl_internal_format, width, height); - - if (data) - { - // TODO: Fix data for mipmaps here. - if (layers > 1) - glTexSubImage3D(target, 0, 0, 0, 0, width, height, layers, gl_format, gl_type, data); - else - glTexSubImage2D(target, 0, 0, 0, width, height, gl_format, gl_type, data); - } - } - else - { - for (u32 i = 0; i < levels; i++) - { - // TODO: Fix data pointer here. - if (layers > 1) - glTexImage3D(target, i, gl_internal_format, width, height, layers, 0, gl_format, gl_type, data); - else - glTexImage2D(target, i, gl_internal_format, width, height, 0, gl_format, gl_type, data); - } - - // This doesn't exist on GLES2. - if (!GLAD_GL_ES_VERSION_2_0 || GLAD_GL_ES_VERSION_3_0) - { - glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels); - } - } - - glTexParameteri(target, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); - glTexParameteri(target, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); - glTexParameteri(target, GL_TEXTURE_WRAP_S, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); - glTexParameteri(target, GL_TEXTURE_WRAP_T, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); - - if (layers > 1) - glTexParameteri(target, GL_TEXTURE_WRAP_R, wrap ? GL_REPEAT : GL_CLAMP_TO_EDGE); - } - - GLenum error = glGetError(); - if (error != GL_NO_ERROR) - { - Log_ErrorPrintf("Failed to create texture: 0x%X", error); - glDeleteTextures(1, &id); - return false; - } - - if (IsValid()) - Destroy(); - - m_id = id; - m_width = static_cast(width); - m_height = static_cast(height); - m_layers = static_cast(layers); - m_levels = static_cast(levels); - m_samples = static_cast(samples); - m_format = format; - return true; -} - -void GL::Texture::Replace(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data) -{ - Assert(IsValid() && width < MAX_WIDTH && height < MAX_HEIGHT && m_layers == 1 && m_samples == 1 && m_levels == 1); - - const bool size_changed = (width != m_width || height != m_height); - - m_width = static_cast(width); - m_height = static_cast(height); - m_levels = 1; - - const GLenum target = GetGLTarget(); - glBindTexture(target, m_id); - - if (UseTextureStorage()) - { - if (size_changed) - { - if (m_layers > 0) - glTexStorage3D(target, m_levels, internal_format, m_width, m_height, m_levels); - else - glTexStorage2D(target, m_levels, internal_format, m_width, m_height); - } - - glTexSubImage2D(target, 0, 0, 0, m_width, m_height, format, type, data); - } - else - { - glTexImage2D(target, 0, internal_format, width, height, 0, format, type, data); - } -} - -void GL::Texture::ReplaceImage(u32 layer, u32 level, GLenum format, GLenum type, const void* data) -{ - Assert(IsValid() && !IsMultisampled()); - - const GLenum target = GetGLTarget(); - if (IsTextureArray()) - glTexSubImage3D(target, level, 0, 0, layer, m_width, m_height, 1, format, type, data); - else - glTexSubImage2D(target, level, 0, 0, m_width, m_height, format, type, data); -} - -void GL::Texture::ReplaceSubImage(u32 layer, u32 level, u32 x, u32 y, u32 width, u32 height, GLenum format, GLenum type, - const void* data) -{ - Assert(IsValid() && !IsMultisampled()); - - const GLenum target = GetGLTarget(); - if (IsTextureArray()) - glTexSubImage3D(target, level, x, y, layer, width, height, 1, format, type, data); - else - glTexSubImage2D(target, level, x, y, width, height, format, type, data); -} - -void GL::Texture::SetLinearFilter(bool enabled) const -{ - Assert(!IsMultisampled()); - - Bind(); - - const GLenum target = GetGLTarget(); - glTexParameteri(target, GL_TEXTURE_MIN_FILTER, enabled ? GL_LINEAR : GL_NEAREST); - glTexParameteri(target, GL_TEXTURE_MAG_FILTER, enabled ? GL_LINEAR : GL_NEAREST); -} - -void GL::Texture::SetWrap(bool enabled) const -{ - const GLenum target = GetGLTarget(); - glTexParameteri(target, GL_TEXTURE_WRAP_S, enabled ? GL_REPEAT : GL_CLAMP_TO_EDGE); - glTexParameteri(target, GL_TEXTURE_WRAP_T, enabled ? GL_REPEAT : GL_CLAMP_TO_EDGE); - - if (m_layers > 1) - glTexParameteri(target, GL_TEXTURE_WRAP_R, enabled ? GL_REPEAT : GL_CLAMP_TO_EDGE); -} - -bool GL::Texture::CreateFramebuffer() -{ - if (!IsValid()) - return false; - - glGetError(); - - GLuint fbo_id; - glGenFramebuffers(1, &fbo_id); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_id, 0); - if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) - { - glDeleteFramebuffers(1, &fbo_id); - return false; - } - - if (m_fbo_id != 0) - glDeleteFramebuffers(1, &m_fbo_id); - - m_fbo_id = fbo_id; - return true; -} - -void GL::Texture::Destroy() -{ - if (m_fbo_id != 0) - { - glDeleteFramebuffers(1, &m_fbo_id); - m_fbo_id = 0; - } - if (m_id != 0) - { - glDeleteTextures(1, &m_id); - m_id = 0; - } - - ClearBaseProperties(); -} - -void GL::Texture::Bind() const -{ - glBindTexture(GetGLTarget(), m_id); -} - -void GL::Texture::BindFramebuffer(GLenum target /*= GL_DRAW_FRAMEBUFFER*/) const -{ - DebugAssert(m_fbo_id != 0); - glBindFramebuffer(target, m_fbo_id); -} - -void GL::Texture::Unbind() const -{ - glBindTexture(GetGLTarget(), 0); -} - -GL::Texture& GL::Texture::operator=(Texture&& moved) -{ - Destroy(); - - m_id = moved.m_id; - m_fbo_id = moved.m_fbo_id; - m_width = moved.m_width; - m_height = moved.m_height; - m_layers = moved.m_layers; - m_levels = moved.m_levels; - m_samples = moved.m_samples; - - moved.m_id = 0; - moved.m_fbo_id = 0; - moved.ClearBaseProperties(); - return *this; -} - -void GL::Texture::GetTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, - GLsizei bufSize, void* pixels) -{ - if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) - { - glGetTextureSubImage(texture, level, xoffset, yoffset, zoffset, width, height, depth, format, type, bufSize, - pixels); - return; - } - - GLenum target = GL_READ_FRAMEBUFFER; - GLenum target_binding = GL_READ_FRAMEBUFFER_BINDING; - if (GLAD_GL_ES_VERSION_2_0 && !GLAD_GL_ES_VERSION_3_0) - { - // GLES2 doesn't have GL_READ_FRAMEBUFFER. - target = GL_FRAMEBUFFER; - target_binding = GL_FRAMEBUFFER_BINDING; - } - - Assert(depth == 1); - - GLuint old_read_fbo; - glGetIntegerv(target_binding, reinterpret_cast(&old_read_fbo)); - - GLuint temp_fbo; - glGenFramebuffers(1, &temp_fbo); - glBindFramebuffer(target, temp_fbo); - if (zoffset > 0 && (GLAD_GL_VERSION_3_0 || GLAD_GL_ES_VERSION_3_0)) - glFramebufferTextureLayer(target, GL_COLOR_ATTACHMENT0, texture, level, zoffset); - else - glFramebufferTexture2D(target, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture, level); - - DebugAssert(glCheckFramebufferStatus(target) == GL_FRAMEBUFFER_COMPLETE); - glReadPixels(xoffset, yoffset, width, height, format, type, pixels); - - glBindFramebuffer(target, old_read_fbo); - glDeleteFramebuffers(1, &temp_fbo); -} diff --git a/src/common/gl/texture.h b/src/common/gl/texture.h deleted file mode 100644 index 5aa71be2d..000000000 --- a/src/common/gl/texture.h +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../gpu_texture.h" -#include "loader.h" -#include - -namespace GL { - -class Texture final : public GPUTexture -{ -public: - Texture(); - Texture(Texture&& moved); - ~Texture(); - - static bool UseTextureStorage(bool multisampled); - static const std::tuple& GetPixelFormatMapping(Format format); - - ALWAYS_INLINE GLuint GetGLId() const { return m_id; } - bool IsValid() const override { return m_id != 0; } - - bool Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Format format, const void* data = nullptr, - u32 data_pitch = 0, bool linear = true, bool wrap = true); - void Destroy(); - - void Replace(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data); - void ReplaceImage(u32 layer, u32 level, GLenum format, GLenum type, const void* data); - void ReplaceSubImage(u32 layer, u32 level, u32 x, u32 y, u32 width, u32 height, GLenum format, GLenum type, - const void* data); - bool CreateFramebuffer(); - - bool UseTextureStorage() const; - - void SetLinearFilter(bool enabled) const; - void SetWrap(bool enabled) const; - - ALWAYS_INLINE GLuint GetGLFramebufferID() const { return m_fbo_id; } - ALWAYS_INLINE GLenum GetGLTarget() const - { - return (IsMultisampled() ? (IsTextureArray() ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D_MULTISAMPLE_ARRAY) : - (IsTextureArray() ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D)); - } - - void Bind() const; - void BindFramebuffer(GLenum target = GL_DRAW_FRAMEBUFFER) const; - void Unbind() const; - - Texture& operator=(const Texture& copy) = delete; - Texture& operator=(Texture&& moved); - - // Helper which uses glGetTextureSubImage where available, otherwise a temporary FBO. - static void GetTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, - GLsizei bufSize, void* pixels); - -private: - GLuint m_id = 0; - GLuint m_fbo_id = 0; -}; - -} // namespace GL \ No newline at end of file diff --git a/src/common/gl/x11_window.cpp b/src/common/gl/x11_window.cpp deleted file mode 100644 index 016c80cf2..000000000 --- a/src/common/gl/x11_window.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "x11_window.h" -#include "../assert.h" -#include "../log.h" -#include -Log_SetChannel(X11Window); - -namespace GL { -X11Window::X11Window() = default; - -X11Window::~X11Window() -{ - Destroy(); -} - -bool X11Window::Create(Display* display, Window parent_window, const XVisualInfo* vi) -{ - m_display = display; - m_parent_window = parent_window; - XSync(m_display, True); - - XWindowAttributes parent_wa = {}; - XGetWindowAttributes(m_display, m_parent_window, &parent_wa); - m_width = static_cast(parent_wa.width); - m_height = static_cast(parent_wa.height); - - // Failed X calls terminate the process so no need to check for errors. - // We could swap the error handler out here as well. - m_colormap = XCreateColormap(m_display, m_parent_window, vi->visual, AllocNone); - - XSetWindowAttributes wa = {}; - wa.colormap = m_colormap; - - m_window = XCreateWindow(m_display, m_parent_window, 0, 0, m_width, m_height, 0, vi->depth, InputOutput, vi->visual, - CWColormap, &wa); - XMapWindow(m_display, m_window); - XSync(m_display, True); - - return true; -} - -void X11Window::Destroy() -{ - if (m_window) - { - XUnmapWindow(m_display, m_window); - XDestroyWindow(m_display, m_window); - m_window = {}; - } - - if (m_colormap) - { - XFreeColormap(m_display, m_colormap); - m_colormap = {}; - } -} - -void X11Window::Resize(u32 width, u32 height) -{ - if (width != 0 && height != 0) - { - m_width = width; - m_height = height; - } - else - { - XWindowAttributes parent_wa = {}; - XGetWindowAttributes(m_display, m_parent_window, &parent_wa); - m_width = static_cast(parent_wa.width); - m_height = static_cast(parent_wa.height); - } - - XResizeWindow(m_display, m_window, m_width, m_height); -} - -static X11InhibitErrors* s_current_error_inhibiter; - -X11InhibitErrors::X11InhibitErrors() -{ - Assert(!s_current_error_inhibiter); - m_old_handler = XSetErrorHandler(ErrorHandler); - s_current_error_inhibiter = this; -} - -X11InhibitErrors::~X11InhibitErrors() -{ - Assert(s_current_error_inhibiter == this); - s_current_error_inhibiter = nullptr; - XSetErrorHandler(m_old_handler); -} - -int X11InhibitErrors::ErrorHandler(Display* display, XErrorEvent* ee) -{ - char error_string[256] = {}; - XGetErrorText(display, ee->error_code, error_string, sizeof(error_string)); - Log_WarningPrintf("X11 Error: %s (Error %u Minor %u Request %u)", error_string, ee->error_code, ee->minor_code, - ee->request_code); - - s_current_error_inhibiter->m_had_error = true; - return 0; -} -} // namespace GL diff --git a/src/common/gl/x11_window.h b/src/common/gl/x11_window.h deleted file mode 100644 index 0aa9e9d6d..000000000 --- a/src/common/gl/x11_window.h +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../types.h" -#include -#include - -namespace GL { -class X11Window -{ -public: - X11Window(); - ~X11Window(); - - ALWAYS_INLINE Window GetWindow() const { return m_window; } - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - - bool Create(Display* display, Window parent_window, const XVisualInfo* vi); - void Destroy(); - - // Setting a width/height of 0 will use parent dimensions. - void Resize(u32 width = 0, u32 height = 0); - -private: - Display* m_display = nullptr; - Window m_parent_window = {}; - Window m_window = {}; - Colormap m_colormap = {}; - u32 m_width = 0; - u32 m_height = 0; -}; - -// Helper class for managing X errors -class X11InhibitErrors -{ -public: - X11InhibitErrors(); - ~X11InhibitErrors(); - - ALWAYS_INLINE bool HadError() const { return m_had_error; } - -private: - static int ErrorHandler(Display* display, XErrorEvent* ee); - - XErrorHandler m_old_handler = {}; - bool m_had_error = false; -}; - -} // namespace GL diff --git a/src/common/gpu_texture.h b/src/common/gpu_texture.h deleted file mode 100644 index c63ab3301..000000000 --- a/src/common/gpu_texture.h +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "types.h" -#include -#include - -class GPUTexture -{ -public: - enum : u32 - { - MAX_WIDTH = 65535, - MAX_HEIGHT = 65535, - MAX_LAYERS = 255, - MAX_LEVELS = 255, - MAX_SAMPLES = 255, - }; - - enum class Format : u8 - { - Unknown, - RGBA8, - BGRA8, - RGB565, - RGBA5551, - R8, - D16, - Count - }; - -public: - virtual ~GPUTexture(); - - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - ALWAYS_INLINE u32 GetLayers() const { return m_layers; } - ALWAYS_INLINE u32 GetLevels() const { return m_levels; } - ALWAYS_INLINE u32 GetSamples() const { return m_samples; } - ALWAYS_INLINE GPUTexture::Format GetFormat() const { return m_format; } - - ALWAYS_INLINE bool IsTextureArray() const { return m_layers > 1; } - ALWAYS_INLINE bool IsMultisampled() const { return m_samples > 1; } - - ALWAYS_INLINE u32 GetPixelSize() const { return GetPixelSize(m_format); } - ALWAYS_INLINE u32 GetMipWidth(u32 level) const { return std::max(m_width >> level, 1u); } - ALWAYS_INLINE u32 GetMipHeight(u32 level) const { return std::max(m_height >> level, 1u); } - - virtual bool IsValid() const = 0; - - static u32 GetPixelSize(GPUTexture::Format format); - static bool IsDepthFormat(GPUTexture::Format format); - - static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, - GPUTexture::Format format); - static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector& texture_data, u32 texture_data_stride); - -protected: - GPUTexture(); - GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Format format); - - void ClearBaseProperties(); - - u16 m_width = 0; - u16 m_height = 0; - u8 m_layers = 0; - u8 m_levels = 0; - u8 m_samples = 0; - Format m_format = Format::Unknown; -}; diff --git a/src/common/http_downloader_winhttp.cpp b/src/common/http_downloader_winhttp.cpp index b15992e49..f746daedf 100644 --- a/src/common/http_downloader_winhttp.cpp +++ b/src/common/http_downloader_winhttp.cpp @@ -10,8 +10,6 @@ #include Log_SetChannel(HTTPDownloaderWinHttp); -#pragma comment(lib, "winhttp.lib") - namespace Common { HTTPDownloaderWinHttp::HTTPDownloaderWinHttp() : HTTPDownloader() {} diff --git a/src/common/vulkan/builders.cpp b/src/common/vulkan/builders.cpp deleted file mode 100644 index 4efc5d001..000000000 --- a/src/common/vulkan/builders.cpp +++ /dev/null @@ -1,761 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "builders.h" -#include "../assert.h" -#include "util.h" - -namespace Vulkan { - -DescriptorSetLayoutBuilder::DescriptorSetLayoutBuilder() -{ - Clear(); -} - -void DescriptorSetLayoutBuilder::Clear() -{ - m_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - m_ci.pNext = nullptr; - m_ci.flags = 0; - m_ci.pBindings = nullptr; - m_ci.bindingCount = 0; -} - -VkDescriptorSetLayout DescriptorSetLayoutBuilder::Create(VkDevice device) -{ - VkDescriptorSetLayout layout; - VkResult res = vkCreateDescriptorSetLayout(device, &m_ci, nullptr, &layout); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDescriptorSetLayout() failed: "); - return VK_NULL_HANDLE; - } - - Clear(); - return layout; -} - -void DescriptorSetLayoutBuilder::AddBinding(u32 binding, VkDescriptorType dtype, u32 dcount, VkShaderStageFlags stages) -{ - Assert(m_ci.bindingCount < MAX_BINDINGS); - - VkDescriptorSetLayoutBinding& b = m_bindings[m_ci.bindingCount]; - b.binding = binding; - b.descriptorType = dtype; - b.descriptorCount = dcount; - b.stageFlags = stages; - b.pImmutableSamplers = nullptr; - - m_ci.pBindings = m_bindings.data(); - m_ci.bindingCount++; -} - -PipelineLayoutBuilder::PipelineLayoutBuilder() -{ - Clear(); -} - -void PipelineLayoutBuilder::Clear() -{ - m_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - m_ci.pNext = nullptr; - m_ci.flags = 0; - m_ci.pSetLayouts = nullptr; - m_ci.setLayoutCount = 0; - m_ci.pPushConstantRanges = nullptr; - m_ci.pushConstantRangeCount = 0; -} - -VkPipelineLayout PipelineLayoutBuilder::Create(VkDevice device) -{ - VkPipelineLayout layout; - VkResult res = vkCreatePipelineLayout(device, &m_ci, nullptr, &layout); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreatePipelineLayout() failed: "); - return VK_NULL_HANDLE; - } - - Clear(); - return layout; -} - -void PipelineLayoutBuilder::AddDescriptorSet(VkDescriptorSetLayout layout) -{ - Assert(m_ci.setLayoutCount < MAX_SETS); - - m_sets[m_ci.setLayoutCount] = layout; - - m_ci.setLayoutCount++; - m_ci.pSetLayouts = m_sets.data(); -} - -void PipelineLayoutBuilder::AddPushConstants(VkShaderStageFlags stages, u32 offset, u32 size) -{ - Assert(m_ci.pushConstantRangeCount < MAX_PUSH_CONSTANTS); - - VkPushConstantRange& r = m_push_constants[m_ci.pushConstantRangeCount]; - r.stageFlags = stages; - r.offset = offset; - r.size = size; - - m_ci.pushConstantRangeCount++; - m_ci.pPushConstantRanges = m_push_constants.data(); -} - -GraphicsPipelineBuilder::GraphicsPipelineBuilder() -{ - Clear(); -} - -void GraphicsPipelineBuilder::Clear() -{ - m_ci = {}; - m_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - - m_shader_stages = {}; - - m_vertex_input_state = {}; - m_vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - m_ci.pVertexInputState = &m_vertex_input_state; - m_vertex_attributes = {}; - m_vertex_buffers = {}; - - m_input_assembly = {}; - m_input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - - m_rasterization_state = {}; - m_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - m_rasterization_state.lineWidth = 1.0f; - m_depth_state = {}; - m_depth_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - m_blend_state = {}; - m_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - m_blend_attachments = {}; - - m_viewport_state = {}; - m_viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - m_viewport = {}; - m_scissor = {}; - - m_dynamic_state = {}; - m_dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - m_dynamic_state_values = {}; - - m_multisample_state = {}; - m_multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - - // set defaults - SetNoCullRasterizationState(); - SetNoDepthTestState(); - SetNoBlendingState(); - SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - - // have to be specified even if dynamic - SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); - SetScissorRect(0, 0, 1, 1); - SetMultisamples(VK_SAMPLE_COUNT_1_BIT); -} - -VkPipeline GraphicsPipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear /* = true */) -{ - VkPipeline pipeline; - VkResult res = vkCreateGraphicsPipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines() failed: "); - return VK_NULL_HANDLE; - } - - if (clear) - Clear(); - - return pipeline; -} - -void GraphicsPipelineBuilder::SetShaderStage(VkShaderStageFlagBits stage, VkShaderModule module, - const char* entry_point) -{ - Assert(m_ci.stageCount < MAX_SHADER_STAGES); - - u32 index = 0; - for (; index < m_ci.stageCount; index++) - { - if (m_shader_stages[index].stage == stage) - break; - } - if (index == m_ci.stageCount) - { - m_ci.stageCount++; - m_ci.pStages = m_shader_stages.data(); - } - - VkPipelineShaderStageCreateInfo& s = m_shader_stages[index]; - s.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - s.stage = stage; - s.module = module; - s.pName = entry_point; -} - -void GraphicsPipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, - VkVertexInputRate input_rate /*= VK_VERTEX_INPUT_RATE_VERTEX*/) -{ - Assert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); - - VkVertexInputBindingDescription& b = m_vertex_buffers[m_vertex_input_state.vertexBindingDescriptionCount]; - b.binding = binding; - b.stride = stride; - b.inputRate = input_rate; - - m_vertex_input_state.vertexBindingDescriptionCount++; - m_vertex_input_state.pVertexBindingDescriptions = m_vertex_buffers.data(); - m_ci.pVertexInputState = &m_vertex_input_state; -} - -void GraphicsPipelineBuilder::AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset) -{ - Assert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); - - VkVertexInputAttributeDescription& a = m_vertex_attributes[m_vertex_input_state.vertexAttributeDescriptionCount]; - a.location = location; - a.binding = binding; - a.format = format; - a.offset = offset; - - m_vertex_input_state.vertexAttributeDescriptionCount++; - m_vertex_input_state.pVertexAttributeDescriptions = m_vertex_attributes.data(); - m_ci.pVertexInputState = &m_vertex_input_state; -} - -void GraphicsPipelineBuilder::SetPrimitiveTopology(VkPrimitiveTopology topology, - bool enable_primitive_restart /*= false*/) -{ - m_input_assembly.topology = topology; - m_input_assembly.primitiveRestartEnable = enable_primitive_restart; - - m_ci.pInputAssemblyState = &m_input_assembly; -} - -void GraphicsPipelineBuilder::SetRasterizationState(VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, - VkFrontFace front_face) -{ - m_rasterization_state.polygonMode = polygon_mode; - m_rasterization_state.cullMode = cull_mode; - m_rasterization_state.frontFace = front_face; - - m_ci.pRasterizationState = &m_rasterization_state; -} - -void GraphicsPipelineBuilder::SetLineWidth(float width) -{ - m_rasterization_state.lineWidth = width; -} - -void GraphicsPipelineBuilder::SetMultisamples(u32 multisamples, bool per_sample_shading) -{ - m_multisample_state.rasterizationSamples = static_cast(multisamples); - m_multisample_state.sampleShadingEnable = per_sample_shading; - m_multisample_state.minSampleShading = (multisamples > 1) ? 1.0f : 0.0f; -} - -void GraphicsPipelineBuilder::SetNoCullRasterizationState() -{ - SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); -} - -void GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op) -{ - m_depth_state.depthTestEnable = depth_test; - m_depth_state.depthWriteEnable = depth_write; - m_depth_state.depthCompareOp = compare_op; - - m_ci.pDepthStencilState = &m_depth_state; -} - -void GraphicsPipelineBuilder::SetNoDepthTestState() -{ - SetDepthState(false, false, VK_COMPARE_OP_ALWAYS); -} - -void GraphicsPipelineBuilder::SetBlendConstants(float r, float g, float b, float a) -{ - m_blend_state.blendConstants[0] = r; - m_blend_state.blendConstants[1] = g; - m_blend_state.blendConstants[2] = b; - m_blend_state.blendConstants[3] = a; - m_ci.pColorBlendState = &m_blend_state; -} - -void GraphicsPipelineBuilder::AddBlendAttachment( - bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, - VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, VkColorComponentFlags write_mask /* = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT */) -{ - Assert(m_blend_state.attachmentCount < MAX_ATTACHMENTS); - - VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[m_blend_state.attachmentCount]; - bs.blendEnable = blend_enable; - bs.srcColorBlendFactor = src_factor; - bs.dstColorBlendFactor = dst_factor; - bs.colorBlendOp = op; - bs.srcAlphaBlendFactor = alpha_src_factor; - bs.dstAlphaBlendFactor = alpha_dst_factor; - bs.alphaBlendOp = alpha_op; - bs.colorWriteMask = write_mask; - - m_blend_state.attachmentCount++; - m_blend_state.pAttachments = m_blend_attachments.data(); - m_ci.pColorBlendState = &m_blend_state; -} - -void GraphicsPipelineBuilder::SetBlendAttachment( - u32 attachment, bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, - VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, VkColorComponentFlags write_mask /*= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT*/) -{ - Assert(attachment < MAX_ATTACHMENTS); - - VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[attachment]; - bs.blendEnable = blend_enable; - bs.srcColorBlendFactor = src_factor; - bs.dstColorBlendFactor = dst_factor; - bs.colorBlendOp = op; - bs.srcAlphaBlendFactor = alpha_src_factor; - bs.dstAlphaBlendFactor = alpha_dst_factor; - bs.alphaBlendOp = alpha_op; - bs.colorWriteMask = write_mask; - - if (attachment >= m_blend_state.attachmentCount) - { - m_blend_state.attachmentCount = attachment + 1u; - m_blend_state.pAttachments = m_blend_attachments.data(); - m_ci.pColorBlendState = &m_blend_state; - } -} - -void GraphicsPipelineBuilder::ClearBlendAttachments() -{ - m_blend_attachments = {}; - m_blend_state.attachmentCount = 0; -} - -void GraphicsPipelineBuilder::SetNoBlendingState() -{ - ClearBlendAttachments(); - SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE, - VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT); -} - -void GraphicsPipelineBuilder::AddDynamicState(VkDynamicState state) -{ - Assert(m_dynamic_state.dynamicStateCount < MAX_DYNAMIC_STATE); - - m_dynamic_state_values[m_dynamic_state.dynamicStateCount] = state; - m_dynamic_state.dynamicStateCount++; - m_dynamic_state.pDynamicStates = m_dynamic_state_values.data(); - m_ci.pDynamicState = &m_dynamic_state; -} - -void GraphicsPipelineBuilder::SetDynamicViewportAndScissorState() -{ - AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); - AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); -} - -void GraphicsPipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) -{ - m_viewport.x = x; - m_viewport.y = y; - m_viewport.width = width; - m_viewport.height = height; - m_viewport.minDepth = min_depth; - m_viewport.maxDepth = max_depth; - - m_viewport_state.pViewports = &m_viewport; - m_viewport_state.viewportCount = 1u; - m_ci.pViewportState = &m_viewport_state; -} - -void GraphicsPipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) -{ - m_scissor.offset.x = x; - m_scissor.offset.y = y; - m_scissor.extent.width = width; - m_scissor.extent.height = height; - - m_viewport_state.pScissors = &m_scissor; - m_viewport_state.scissorCount = 1u; - m_ci.pViewportState = &m_viewport_state; -} - -void GraphicsPipelineBuilder::SetMultisamples(VkSampleCountFlagBits samples) -{ - m_multisample_state.rasterizationSamples = samples; - m_ci.pMultisampleState = &m_multisample_state; -} - -void GraphicsPipelineBuilder::SetPipelineLayout(VkPipelineLayout layout) -{ - m_ci.layout = layout; -} - -void GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpass) -{ - m_ci.renderPass = render_pass; - m_ci.subpass = subpass; -} - -SamplerBuilder::SamplerBuilder() -{ - Clear(); -} - -void SamplerBuilder::Clear() -{ - m_ci = {}; - m_ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; -} - -VkSampler SamplerBuilder::Create(VkDevice device, bool clear /* = true */) -{ - VkSampler sampler; - VkResult res = vkCreateSampler(device, &m_ci, nullptr, &sampler); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSampler() failed: "); - return VK_NULL_HANDLE; - } - - return sampler; -} - -void SamplerBuilder::SetFilter(VkFilter mag_filter, VkFilter min_filter, VkSamplerMipmapMode mip_filter) -{ - m_ci.magFilter = mag_filter; - m_ci.minFilter = min_filter; - m_ci.mipmapMode = mip_filter; -} - -void SamplerBuilder::SetAddressMode(VkSamplerAddressMode u, VkSamplerAddressMode v, VkSamplerAddressMode w) -{ - m_ci.addressModeU = u; - m_ci.addressModeV = v; - m_ci.addressModeW = w; -} - -void SamplerBuilder::SetBorderColor(VkBorderColor color) -{ - m_ci.borderColor = color; -} - -void SamplerBuilder::SetPointSampler(VkSamplerAddressMode address_mode /* = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER */) -{ - Clear(); - SetFilter(VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); - SetAddressMode(address_mode, address_mode, address_mode); -} - -void SamplerBuilder::SetLinearSampler(bool mipmaps, - VkSamplerAddressMode address_mode /* = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER */) -{ - Clear(); - SetFilter(VK_FILTER_LINEAR, VK_FILTER_LINEAR, - mipmaps ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST); - SetAddressMode(address_mode, address_mode, address_mode); - - if (mipmaps) - { - m_ci.minLod = std::numeric_limits::min(); - m_ci.maxLod = std::numeric_limits::max(); - } -} - -DescriptorSetUpdateBuilder::DescriptorSetUpdateBuilder() -{ - Clear(); -} - -void DescriptorSetUpdateBuilder::Clear() -{ - m_writes = {}; - m_num_writes = 0; -} - -void DescriptorSetUpdateBuilder::Update(VkDevice device, bool clear /*= true*/) -{ - Assert(m_num_writes > 0); - - vkUpdateDescriptorSets(device, m_num_writes, (m_num_writes > 0) ? m_writes.data() : nullptr, 0, nullptr); - - if (clear) - Clear(); -} - -void DescriptorSetUpdateBuilder::AddImageDescriptorWrite( - VkDescriptorSet set, u32 binding, VkImageView view, - VkImageLayout layout /*= VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/) -{ - Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS); - - VkDescriptorImageInfo& ii = m_infos[m_num_infos++].image; - ii.imageView = view; - ii.imageLayout = layout; - ii.sampler = VK_NULL_HANDLE; - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - dw.pImageInfo = ⅈ -} - -void DescriptorSetUpdateBuilder::AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler) -{ - Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS); - - VkDescriptorImageInfo& ii = m_infos[m_num_infos++].image; - ii.imageView = VK_NULL_HANDLE; - ii.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - ii.sampler = sampler; - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - dw.pImageInfo = ⅈ -} - -void DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrite( - VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler, - VkImageLayout layout /*= VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/) -{ - Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS); - - VkDescriptorImageInfo& ii = m_infos[m_num_infos++].image; - ii.imageView = view; - ii.imageLayout = layout; - ii.sampler = sampler; - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - dw.pImageInfo = ⅈ -} - -void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, - VkBuffer buffer, u32 offset, u32 size) -{ - Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS); - - VkDescriptorBufferInfo& bi = m_infos[m_num_infos++].buffer; - bi.buffer = buffer; - bi.offset = offset; - bi.range = size; - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = dtype; - dw.pBufferInfo = &bi; -} - -void DescriptorSetUpdateBuilder::AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, - VkBufferView view) -{ - Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS); - - VkBufferView& bi = m_infos[m_num_infos++].buffer_view; - bi = view; - - VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; - dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - dw.dstSet = set; - dw.dstBinding = binding; - dw.descriptorCount = 1; - dw.descriptorType = dtype; - dw.pTexelBufferView = &bi; -} - -FramebufferBuilder::FramebufferBuilder() -{ - Clear(); -} - -void FramebufferBuilder::Clear() -{ - m_ci = {}; - m_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - m_images = {}; -} - -VkFramebuffer FramebufferBuilder::Create(VkDevice device, bool clear /*= true*/) -{ - VkFramebuffer fb; - VkResult res = vkCreateFramebuffer(device, &m_ci, nullptr, &fb); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer() failed: "); - return VK_NULL_HANDLE; - } - - if (clear) - Clear(); - - return fb; -} - -void FramebufferBuilder::AddAttachment(VkImageView image) -{ - Assert(m_ci.attachmentCount < MAX_ATTACHMENTS); - - m_images[m_ci.attachmentCount] = image; - - m_ci.attachmentCount++; - m_ci.pAttachments = m_images.data(); -} - -void FramebufferBuilder::SetSize(u32 width, u32 height, u32 layers) -{ - m_ci.width = width; - m_ci.height = height; - m_ci.layers = layers; -} - -void FramebufferBuilder::SetRenderPass(VkRenderPass render_pass) -{ - m_ci.renderPass = render_pass; -} - -RenderPassBuilder::RenderPassBuilder() -{ - Clear(); -} - -void RenderPassBuilder::Clear() -{ - m_ci = {}; - m_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - m_attachments = {}; - m_attachment_references = {}; - m_num_attachment_references = 0; - m_subpasses = {}; -} - -VkRenderPass RenderPassBuilder::Create(VkDevice device, bool clear /*= true*/) -{ - VkRenderPass rp; - VkResult res = vkCreateRenderPass(device, &m_ci, nullptr, &rp); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateRenderPass() failed: "); - return VK_NULL_HANDLE; - } - - return rp; -} - -u32 RenderPassBuilder::AddAttachment(VkFormat format, VkSampleCountFlagBits samples, VkAttachmentLoadOp load_op, - VkAttachmentStoreOp store_op, VkImageLayout initial_layout, - VkImageLayout final_layout) -{ - Assert(m_ci.attachmentCount < MAX_ATTACHMENTS); - - const u32 index = m_ci.attachmentCount; - VkAttachmentDescription& ad = m_attachments[index]; - ad.format = format; - ad.samples = samples; - ad.loadOp = load_op; - ad.storeOp = store_op; - ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - ad.initialLayout = initial_layout; - ad.finalLayout = final_layout; - - m_ci.attachmentCount++; - m_ci.pAttachments = m_attachments.data(); - - return index; -} - -u32 RenderPassBuilder::AddSubpass() -{ - Assert(m_ci.subpassCount < MAX_SUBPASSES); - - const u32 index = m_ci.subpassCount; - VkSubpassDescription& sp = m_subpasses[index]; - sp.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - - m_ci.subpassCount++; - m_ci.pSubpasses = m_subpasses.data(); - - return index; -} - -void RenderPassBuilder::AddSubpassColorAttachment(u32 subpass, u32 attachment, VkImageLayout layout) -{ - Assert(subpass < m_ci.subpassCount && m_num_attachment_references < MAX_ATTACHMENT_REFERENCES); - - VkAttachmentReference& ar = m_attachment_references[m_num_attachment_references++]; - ar.attachment = attachment; - ar.layout = layout; - - VkSubpassDescription& sp = m_subpasses[subpass]; - if (sp.colorAttachmentCount == 0) - sp.pColorAttachments = &ar; - sp.colorAttachmentCount++; -} - -void RenderPassBuilder::AddSubpassDepthAttachment(u32 subpass, u32 attachment, VkImageLayout layout) -{ - Assert(subpass < m_ci.subpassCount && m_num_attachment_references < MAX_ATTACHMENT_REFERENCES); - - VkAttachmentReference& ar = m_attachment_references[m_num_attachment_references++]; - ar.attachment = attachment; - ar.layout = layout; - - VkSubpassDescription& sp = m_subpasses[subpass]; - sp.pDepthStencilAttachment = &ar; -} - -BufferViewBuilder::BufferViewBuilder() -{ - Clear(); -} - -void BufferViewBuilder::Clear() -{ - m_ci = {}; - m_ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; -} - -VkBufferView BufferViewBuilder::Create(VkDevice device, bool clear /*= true*/) -{ - VkBufferView bv; - VkResult res = vkCreateBufferView(device, &m_ci, nullptr, &bv); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateBufferView() failed: "); - return VK_NULL_HANDLE; - } - - return bv; -} - -void BufferViewBuilder::Set(VkBuffer buffer, VkFormat format, u32 offset, u32 size) -{ - m_ci.buffer = buffer; - m_ci.format = format; - m_ci.offset = offset; - m_ci.range = size; -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/common/vulkan/builders.h b/src/common/vulkan/builders.h deleted file mode 100644 index b2f6642d6..000000000 --- a/src/common/vulkan/builders.h +++ /dev/null @@ -1,274 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../types.h" -#include "loader.h" -#include - -namespace Vulkan { - -class DescriptorSetLayoutBuilder -{ -public: - enum : u32 - { - MAX_BINDINGS = 16, - }; - - DescriptorSetLayoutBuilder(); - - void Clear(); - - VkDescriptorSetLayout Create(VkDevice device); - - void AddBinding(u32 binding, VkDescriptorType dtype, u32 dcount, VkShaderStageFlags stages); - -private: - VkDescriptorSetLayoutCreateInfo m_ci{}; - std::array m_bindings{}; -}; - -class PipelineLayoutBuilder -{ -public: - enum : u32 - { - MAX_SETS = 8, - MAX_PUSH_CONSTANTS = 1 - }; - - PipelineLayoutBuilder(); - - void Clear(); - - VkPipelineLayout Create(VkDevice device); - - void AddDescriptorSet(VkDescriptorSetLayout layout); - - void AddPushConstants(VkShaderStageFlags stages, u32 offset, u32 size); - -private: - VkPipelineLayoutCreateInfo m_ci{}; - std::array m_sets{}; - std::array m_push_constants{}; -}; - -class GraphicsPipelineBuilder -{ -public: - enum : u32 - { - MAX_SHADER_STAGES = 3, - MAX_VERTEX_ATTRIBUTES = 16, - MAX_VERTEX_BUFFERS = 8, - MAX_ATTACHMENTS = 2, - MAX_DYNAMIC_STATE = 8 - }; - - GraphicsPipelineBuilder(); - - void Clear(); - - VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true); - - void SetShaderStage(VkShaderStageFlagBits stage, VkShaderModule module, const char* entry_point); - void SetVertexShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_VERTEX_BIT, module, "main"); } - void SetGeometryShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_GEOMETRY_BIT, module, "main"); } - void SetFragmentShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_FRAGMENT_BIT, module, "main"); } - - void AddVertexBuffer(u32 binding, u32 stride, VkVertexInputRate input_rate = VK_VERTEX_INPUT_RATE_VERTEX); - void AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset); - - void SetPrimitiveTopology(VkPrimitiveTopology topology, bool enable_primitive_restart = false); - - void SetRasterizationState(VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, VkFrontFace front_face); - void SetLineWidth(float width); - void SetMultisamples(u32 multisamples, bool per_sample_shading); - void SetNoCullRasterizationState(); - - void SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op); - void SetNoDepthTestState(); - - void AddBlendAttachment(bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, - VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, - VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); - void SetBlendAttachment(u32 attachment, bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, - VkBlendOp op, VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, - VkBlendOp alpha_op, - VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); - void ClearBlendAttachments(); - - void SetBlendConstants(float r, float g, float b, float a); - void SetNoBlendingState(); - - void AddDynamicState(VkDynamicState state); - - void SetDynamicViewportAndScissorState(); - void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth); - void SetScissorRect(s32 x, s32 y, u32 width, u32 height); - - void SetMultisamples(VkSampleCountFlagBits samples); - - void SetPipelineLayout(VkPipelineLayout layout); - void SetRenderPass(VkRenderPass render_pass, u32 subpass); - -private: - VkGraphicsPipelineCreateInfo m_ci; - std::array m_shader_stages; - - VkPipelineVertexInputStateCreateInfo m_vertex_input_state; - std::array m_vertex_buffers; - std::array m_vertex_attributes; - - VkPipelineInputAssemblyStateCreateInfo m_input_assembly; - - VkPipelineRasterizationStateCreateInfo m_rasterization_state; - VkPipelineDepthStencilStateCreateInfo m_depth_state; - - VkPipelineColorBlendStateCreateInfo m_blend_state; - std::array m_blend_attachments; - - VkPipelineViewportStateCreateInfo m_viewport_state; - VkViewport m_viewport; - VkRect2D m_scissor; - - VkPipelineDynamicStateCreateInfo m_dynamic_state; - std::array m_dynamic_state_values; - - VkPipelineMultisampleStateCreateInfo m_multisample_state; -}; - -class SamplerBuilder -{ -public: - SamplerBuilder(); - - void Clear(); - - VkSampler Create(VkDevice device, bool clear = true); - - void SetFilter(VkFilter mag_filter, VkFilter min_filter, VkSamplerMipmapMode mip_filter); - void SetAddressMode(VkSamplerAddressMode u, VkSamplerAddressMode v, VkSamplerAddressMode w); - void SetBorderColor(VkBorderColor color); - - void SetPointSampler(VkSamplerAddressMode address_mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - void SetLinearSampler(bool mipmaps, VkSamplerAddressMode address_mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - -private: - VkSamplerCreateInfo m_ci; -}; - -class DescriptorSetUpdateBuilder -{ - enum : u32 - { - MAX_WRITES = 16, - MAX_INFOS = 16, - }; - -public: - DescriptorSetUpdateBuilder(); - - void Clear(); - - void Update(VkDevice device, bool clear = true); - - void AddImageDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, - VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler); - void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler, - VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer, u32 offset, - u32 size); - void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view); - -private: - union InfoUnion - { - VkDescriptorBufferInfo buffer; - VkDescriptorImageInfo image; - VkBufferView buffer_view; - }; - - std::array m_writes; - u32 m_num_writes = 0; - - std::array m_infos; - u32 m_num_infos = 0; -}; - -class FramebufferBuilder -{ - enum : u32 - { - MAX_ATTACHMENTS = 2, - }; - -public: - FramebufferBuilder(); - - void Clear(); - - VkFramebuffer Create(VkDevice device, bool clear = true); - - void AddAttachment(VkImageView image); - - void SetSize(u32 width, u32 height, u32 layers); - - void SetRenderPass(VkRenderPass render_pass); - -private: - VkFramebufferCreateInfo m_ci; - std::array m_images; -}; - -class RenderPassBuilder -{ - enum : u32 - { - MAX_ATTACHMENTS = 2, - MAX_ATTACHMENT_REFERENCES = 2, - MAX_SUBPASSES = 1, - }; - -public: - RenderPassBuilder(); - - void Clear(); - - VkRenderPass Create(VkDevice device, bool clear = true); - - u32 AddAttachment(VkFormat format, VkSampleCountFlagBits samples, VkAttachmentLoadOp load_op, - VkAttachmentStoreOp store_op, VkImageLayout initial_layout, VkImageLayout final_layout); - - u32 AddSubpass(); - void AddSubpassColorAttachment(u32 subpass, u32 attachment, VkImageLayout layout); - void AddSubpassDepthAttachment(u32 subpass, u32 attachment, VkImageLayout layout); - -private: - VkRenderPassCreateInfo m_ci; - std::array m_attachments; - std::array m_attachment_references; - u32 m_num_attachment_references = 0; - std::array m_subpasses; -}; - -class BufferViewBuilder -{ -public: - BufferViewBuilder(); - - void Clear(); - - VkBufferView Create(VkDevice device, bool clear = true); - - void Set(VkBuffer buffer, VkFormat format, u32 offset, u32 size); - -private: - VkBufferViewCreateInfo m_ci; -}; - -} // namespace Vulkan \ No newline at end of file diff --git a/src/common/vulkan/context.cpp b/src/common/vulkan/context.cpp deleted file mode 100644 index bcdb6669a..000000000 --- a/src/common/vulkan/context.cpp +++ /dev/null @@ -1,1403 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "context.h" -#include "../assert.h" -#include "../log.h" -#include "../string_util.h" -#include "../window_info.h" -#include "swap_chain.h" -#include "util.h" -#include -#include -#include -Log_SetChannel(Vulkan::Context); - -std::unique_ptr g_vulkan_context; - -enum : u32 -{ - TEXTURE_BUFFER_SIZE = 32 * 1024 * 1024, -}; - -Vulkan::Context::Context(VkInstance instance, VkPhysicalDevice physical_device, bool owns_device) - : m_instance(instance), m_physical_device(physical_device) -{ - // Read device physical memory properties, we need it for allocating buffers - vkGetPhysicalDeviceProperties(physical_device, &m_device_properties); - vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties); - - // Would any drivers be this silly? I hope not... - m_device_properties.limits.minUniformBufferOffsetAlignment = - std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast(1)); - m_device_properties.limits.minTexelBufferOffsetAlignment = - std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast(1)); - m_device_properties.limits.optimalBufferCopyOffsetAlignment = - std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast(1)); - m_device_properties.limits.optimalBufferCopyRowPitchAlignment = - std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast(1)); -} - -Vulkan::Context::~Context() = default; - -bool Vulkan::Context::CheckValidationLayerAvailablility() -{ - u32 extension_count = 0; - VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); - return false; - } - - std::vector extension_list(extension_count); - res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, extension_list.data()); - Assert(res == VK_SUCCESS); - - u32 layer_count = 0; - res = vkEnumerateInstanceLayerProperties(&layer_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); - return false; - } - - std::vector layer_list(layer_count); - res = vkEnumerateInstanceLayerProperties(&layer_count, layer_list.data()); - Assert(res == VK_SUCCESS); - - // Check for both VK_EXT_debug_utils and VK_LAYER_LUNARG_standard_validation - return (std::find_if(extension_list.begin(), extension_list.end(), - [](const auto& it) { - return strcmp(it.extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME) == 0; - }) != extension_list.end() && - std::find_if(layer_list.begin(), layer_list.end(), [](const auto& it) { - return strcmp(it.layerName, "VK_LAYER_KHRONOS_validation") == 0; - }) != layer_list.end()); -} - -VkInstance Vulkan::Context::CreateVulkanInstance(const WindowInfo* wi, bool enable_debug_utils, - bool enable_validation_layer) -{ - ExtensionList enabled_extensions; - if (!SelectInstanceExtensions(&enabled_extensions, wi, enable_debug_utils)) - return VK_NULL_HANDLE; - - VkApplicationInfo app_info = {}; - app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - app_info.pNext = nullptr; - app_info.pApplicationName = "DuckStation"; - app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.pEngineName = "DuckStation"; - app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - app_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo instance_create_info = {}; - instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - instance_create_info.pNext = nullptr; - instance_create_info.flags = 0; - instance_create_info.pApplicationInfo = &app_info; - instance_create_info.enabledExtensionCount = static_cast(enabled_extensions.size()); - instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); - instance_create_info.enabledLayerCount = 0; - instance_create_info.ppEnabledLayerNames = nullptr; - - // Enable debug layer on debug builds - if (enable_validation_layer) - { - static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"}; - instance_create_info.enabledLayerCount = 1; - instance_create_info.ppEnabledLayerNames = layer_names; - } - - VkInstance instance; - VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateInstance failed: "); - return nullptr; - } - - return instance; -} - -bool Vulkan::Context::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo* wi, - bool enable_debug_utils) -{ - u32 extension_count = 0; - VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); - return false; - } - - if (extension_count == 0) - { - Log_ErrorPrintf("Vulkan: No extensions supported by instance."); - return false; - } - - std::vector available_extension_list(extension_count); - res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data()); - Assert(res == VK_SUCCESS); - - for (const auto& extension_properties : available_extension_list) - Log_InfoPrintf("Available extension: %s", extension_properties.extensionName); - - auto SupportsExtension = [&](const char* name, bool required) { - if (std::find_if(available_extension_list.begin(), available_extension_list.end(), - [&](const VkExtensionProperties& properties) { - return !strcmp(name, properties.extensionName); - }) != available_extension_list.end()) - { - Log_InfoPrintf("Enabling extension: %s", name); - extension_list->push_back(name); - return true; - } - - if (required) - Log_ErrorPrintf("Vulkan: Missing required extension %s.", name); - - return false; - }; - - // Common extensions - if (wi && wi->type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true)) - return false; - -#if defined(VK_USE_PLATFORM_WIN32_KHR) - if (wi && wi->type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true)) - return false; -#endif -#if defined(VK_USE_PLATFORM_XLIB_KHR) - if (wi && wi->type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true)) - return false; -#endif -#if defined(VK_USE_PLATFORM_WAYLAND_KHR) - if (wi && wi->type == WindowInfo::Type::Wayland && !SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true)) - return false; -#endif -#if defined(VK_USE_PLATFORM_ANDROID_KHR) - if (wi && wi->type == WindowInfo::Type::Android && !SupportsExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true)) - return false; -#endif -#if defined(VK_USE_PLATFORM_METAL_EXT) - if (wi && wi->type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true)) - return false; -#endif - - if (wi && wi->type == WindowInfo::Type::Display && !SupportsExtension(VK_KHR_DISPLAY_EXTENSION_NAME, true)) - return false; - - // VK_EXT_debug_utils - if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false)) - Log_WarningPrintf("Vulkan: Debug report requested, but extension is not available."); - - return true; -} - -Vulkan::Context::GPUList Vulkan::Context::EnumerateGPUs(VkInstance instance) -{ - u32 gpu_count = 0; - VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr); - if (res != VK_SUCCESS || gpu_count == 0) - { - LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); - return {}; - } - - GPUList gpus; - gpus.resize(gpu_count); - - res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); - return {}; - } - - return gpus; -} - -Vulkan::Context::GPUNameList Vulkan::Context::EnumerateGPUNames(VkInstance instance) -{ - u32 gpu_count = 0; - VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr); - if (res != VK_SUCCESS || gpu_count == 0) - { - LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); - return {}; - } - - GPUList gpus; - gpus.resize(gpu_count); - - res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: "); - return {}; - } - - GPUNameList gpu_names; - gpu_names.reserve(gpu_count); - for (u32 i = 0; i < gpu_count; i++) - { - VkPhysicalDeviceProperties props = {}; - vkGetPhysicalDeviceProperties(gpus[i], &props); - - std::string gpu_name(props.deviceName); - - // handle duplicate adapter names - if (std::any_of(gpu_names.begin(), gpu_names.end(), - [&gpu_name](const std::string& other) { return (gpu_name == other); })) - { - std::string original_adapter_name = std::move(gpu_name); - - u32 current_extra = 2; - do - { - gpu_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra); - current_extra++; - } while (std::any_of(gpu_names.begin(), gpu_names.end(), - [&gpu_name](const std::string& other) { return (gpu_name == other); })); - } - - gpu_names.push_back(std::move(gpu_name)); - } - - return gpu_names; -} - -bool Vulkan::Context::Create(std::string_view gpu_name, const WindowInfo* wi, - std::unique_ptr* out_swap_chain, bool threaded_presentation, - bool enable_debug_utils, bool enable_validation_layer, bool vsync) -{ - AssertMsg(!g_vulkan_context, "Has no current context"); - - if (!Vulkan::LoadVulkanLibrary()) - { - Log_ErrorPrintf("Failed to load Vulkan library"); - return false; - } - - const bool enable_surface = (wi && wi->type != WindowInfo::Type::Surfaceless); - VkInstance instance = CreateVulkanInstance(wi, enable_debug_utils, enable_validation_layer); - if (instance == VK_NULL_HANDLE) - { - Vulkan::UnloadVulkanLibrary(); - return false; - } - - if (!Vulkan::LoadVulkanInstanceFunctions(instance)) - { - Log_ErrorPrintf("Failed to load Vulkan instance functions"); - vkDestroyInstance(instance, nullptr); - Vulkan::UnloadVulkanLibrary(); - return false; - } - - GPUList gpus = EnumerateGPUs(instance); - if (gpus.empty()) - { - vkDestroyInstance(instance, nullptr); - Vulkan::UnloadVulkanLibrary(); - return false; - } - - u32 gpu_index = 0; - GPUNameList gpu_names = EnumerateGPUNames(instance); - if (!gpu_name.empty()) - { - for (; gpu_index < static_cast(gpu_names.size()); gpu_index++) - { - Log_InfoPrintf("GPU %u: %s", static_cast(gpu_index), gpu_names[gpu_index].c_str()); - if (gpu_names[gpu_index] == gpu_name) - break; - } - - if (gpu_index == static_cast(gpu_names.size())) - { - Log_WarningPrintf("Requested GPU '%s' not found, using first (%s)", std::string(gpu_name).c_str(), - gpu_names[0].c_str()); - gpu_index = 0; - } - } - else - { - Log_InfoPrintf("No GPU requested, using first (%s)", gpu_names[0].c_str()); - } - - VkSurfaceKHR surface = VK_NULL_HANDLE; - WindowInfo wi_copy; - if (wi) - wi_copy = *wi; - - if (enable_surface && - (surface = SwapChain::CreateVulkanSurface(instance, gpus[gpu_index], &wi_copy)) == VK_NULL_HANDLE) - { - vkDestroyInstance(instance, nullptr); - Vulkan::UnloadVulkanLibrary(); - return false; - } - - g_vulkan_context.reset(new Context(instance, gpus[gpu_index], true)); - - // Enable debug reports if the "Host GPU" log category is enabled. - if (enable_debug_utils) - g_vulkan_context->EnableDebugUtils(); - - // Attempt to create the device. - if (!g_vulkan_context->CreateDevice(surface, enable_validation_layer, nullptr, 0, nullptr, 0, nullptr) || - !g_vulkan_context->CreateAllocator() || !g_vulkan_context->CreateGlobalDescriptorPool() || - !g_vulkan_context->CreateQueryPool() || !g_vulkan_context->CreateCommandBuffers() || - !g_vulkan_context->CreateTextureStreamBuffer() || - (enable_surface && (*out_swap_chain = SwapChain::Create(wi_copy, surface, vsync)) == nullptr)) - { - // Since we are destroying the instance, we're also responsible for destroying the surface. - if (surface != VK_NULL_HANDLE) - vkDestroySurfaceKHR(instance, surface, nullptr); - - g_vulkan_context.reset(); - return false; - } - - if (threaded_presentation) - g_vulkan_context->StartPresentThread(); - - return true; -} - -void Vulkan::Context::Destroy() -{ - AssertMsg(g_vulkan_context, "Has context"); - - g_vulkan_context->StopPresentThread(); - - if (g_vulkan_context->m_device != VK_NULL_HANDLE) - g_vulkan_context->WaitForGPUIdle(); - - g_vulkan_context->m_texture_upload_buffer.Destroy(false); - - g_vulkan_context->DestroyRenderPassCache(); - g_vulkan_context->DestroyQueryPool(); - g_vulkan_context->DestroyGlobalDescriptorPool(); - g_vulkan_context->DestroyCommandBuffers(); - g_vulkan_context->DestroyAllocator(); - - if (g_vulkan_context->m_device != VK_NULL_HANDLE) - vkDestroyDevice(g_vulkan_context->m_device, nullptr); - - if (g_vulkan_context->m_debug_messenger_callback != VK_NULL_HANDLE) - g_vulkan_context->DisableDebugUtils(); - - if (g_vulkan_context->m_instance != VK_NULL_HANDLE) - vkDestroyInstance(g_vulkan_context->m_instance, nullptr); - - Vulkan::UnloadVulkanLibrary(); - - g_vulkan_context.reset(); -} - -bool Vulkan::Context::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface) -{ - u32 extension_count = 0; - VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: "); - return false; - } - - if (extension_count == 0) - { - Log_ErrorPrintf("Vulkan: No extensions supported by device."); - return false; - } - - std::vector available_extension_list(extension_count); - res = - vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, available_extension_list.data()); - Assert(res == VK_SUCCESS); - - for (const auto& extension_properties : available_extension_list) - Log_InfoPrintf("Available extension: %s", extension_properties.extensionName); - - auto SupportsExtension = [&](const char* name, bool required) { - if (std::find_if(available_extension_list.begin(), available_extension_list.end(), - [&](const VkExtensionProperties& properties) { - return !strcmp(name, properties.extensionName); - }) != available_extension_list.end()) - { - if (std::none_of(extension_list->begin(), extension_list->end(), - [&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); })) - { - Log_InfoPrintf("Enabling extension: %s", name); - extension_list->push_back(name); - } - - return true; - } - - if (required) - Log_ErrorPrintf("Vulkan: Missing required extension %s.", name); - - return false; - }; - - if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true)) - return false; - - m_optional_extensions.vk_ext_memory_budget = SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false); - m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false); - - return true; -} - -bool Vulkan::Context::SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_features) -{ - VkPhysicalDeviceFeatures available_features; - vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); - - if (required_features) - std::memcpy(&m_device_features, required_features, sizeof(m_device_features)); - - // Enable the features we use. - m_device_features.dualSrcBlend = available_features.dualSrcBlend; - m_device_features.sampleRateShading = available_features.sampleRateShading; - return true; -} - -bool Vulkan::Context::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, - const char** required_device_extensions, u32 num_required_device_extensions, - const char** required_device_layers, u32 num_required_device_layers, - const VkPhysicalDeviceFeatures* required_features) -{ - u32 queue_family_count; - vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr); - if (queue_family_count == 0) - { - Log_ErrorPrintf("No queue families found on specified vulkan physical device."); - return false; - } - - std::vector queue_family_properties(queue_family_count); - vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data()); - Log_InfoPrintf("%u vulkan queue families", queue_family_count); - - // Find graphics and present queues. - m_graphics_queue_family_index = queue_family_count; - m_present_queue_family_index = queue_family_count; - for (uint32_t i = 0; i < queue_family_count; i++) - { - VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT; - if (graphics_supported) - { - m_graphics_queue_family_index = i; - // Quit now, no need for a present queue. - if (!surface) - { - break; - } - } - - if (surface) - { - VkBool32 present_supported; - VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); - return false; - } - - if (present_supported) - { - m_present_queue_family_index = i; - } - - // Prefer one queue family index that does both graphics and present. - if (graphics_supported && present_supported) - { - break; - } - } - } - if (m_graphics_queue_family_index == queue_family_count) - { - Log_ErrorPrintf("Vulkan: Failed to find an acceptable graphics queue."); - return false; - } - if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count) - { - Log_ErrorPrintf("Vulkan: Failed to find an acceptable present queue."); - return false; - } - - VkDeviceCreateInfo device_info = {}; - device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - device_info.pNext = nullptr; - device_info.flags = 0; - - static constexpr float queue_priorities[] = {1.0f}; - VkDeviceQueueCreateInfo graphics_queue_info = {}; - graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - graphics_queue_info.pNext = nullptr; - graphics_queue_info.flags = 0; - graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index; - graphics_queue_info.queueCount = 1; - graphics_queue_info.pQueuePriorities = queue_priorities; - - VkDeviceQueueCreateInfo present_queue_info = {}; - present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - present_queue_info.pNext = nullptr; - present_queue_info.flags = 0; - present_queue_info.queueFamilyIndex = m_present_queue_family_index; - present_queue_info.queueCount = 1; - present_queue_info.pQueuePriorities = queue_priorities; - - std::array queue_infos = {{ - graphics_queue_info, - present_queue_info, - }}; - - device_info.queueCreateInfoCount = 1; - if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index) - { - device_info.queueCreateInfoCount = 2; - } - device_info.pQueueCreateInfos = queue_infos.data(); - - ExtensionList enabled_extensions; - for (u32 i = 0; i < num_required_device_extensions; i++) - enabled_extensions.emplace_back(required_device_extensions[i]); - if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE)) - return false; - - device_info.enabledLayerCount = num_required_device_layers; - device_info.ppEnabledLayerNames = required_device_layers; - device_info.enabledExtensionCount = static_cast(enabled_extensions.size()); - device_info.ppEnabledExtensionNames = enabled_extensions.data(); - - // Check for required features before creating. - if (!SelectDeviceFeatures(required_features)) - return false; - - device_info.pEnabledFeatures = &m_device_features; - - // Enable debug layer on debug builds - if (enable_validation_layer) - { - static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"}; - device_info.enabledLayerCount = 1; - device_info.ppEnabledLayerNames = layer_names; - } - - VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDevice failed: "); - return false; - } - - // With the device created, we can fill the remaining entry points. - if (!LoadVulkanDeviceFunctions(m_device)) - return false; - - // Grab the graphics and present queues. - vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue); - if (surface) - vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue); - - m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 && - queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 && - m_device_properties.limits.timestampPeriod > 0); - Log_VerbosePrintf("GPU timing is %s (TS=%u TS valid bits=%u, TS period=%f)", - m_gpu_timing_supported ? "supported" : "not supported", - static_cast(m_device_properties.limits.timestampComputeAndGraphics), - queue_family_properties[m_graphics_queue_family_index].timestampValidBits, - m_device_properties.limits.timestampPeriod); - - ProcessDeviceExtensions(); - return true; -} - -void Vulkan::Context::ProcessDeviceExtensions() -{ - VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; - if (m_optional_extensions.vk_khr_driver_properties) - { - m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; - Util::AddPointerToChain(&properties2, &m_device_driver_properties); - } - - // query - vkGetPhysicalDeviceProperties2(m_physical_device, &properties2); -} - -bool Vulkan::Context::CreateAllocator() -{ - VmaAllocatorCreateInfo ci = {}; - ci.vulkanApiVersion = VK_API_VERSION_1_1; - ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; - ci.physicalDevice = m_physical_device; - ci.device = m_device; - ci.instance = m_instance; - - if (m_optional_extensions.vk_ext_memory_budget) - ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; - - VkResult res = vmaCreateAllocator(&ci, &m_allocator); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: "); - return false; - } - - return true; -} - -void Vulkan::Context::DestroyAllocator() -{ - if (m_allocator == VK_NULL_HANDLE) - return; - - vmaDestroyAllocator(m_allocator); - m_allocator = VK_NULL_HANDLE; -} - -bool Vulkan::Context::CreateCommandBuffers() -{ - VkResult res; - - uint32_t frame_index = 0; - for (FrameResources& resources : m_frame_resources) - { - resources.needs_fence_wait = false; - - VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, - m_graphics_queue_family_index}; - res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: "); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.command_pool, "Frame Command Pool %u", - frame_index); - - VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, - resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1}; - - res = vkAllocateCommandBuffers(m_device, &buffer_info, &resources.command_buffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: "); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.command_buffer, "Frame Command Buffer %u", - frame_index); - - VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT}; - - res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFence failed: "); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.fence, "Frame Fence %u", frame_index); - // TODO: A better way to choose the number of descriptors. - VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}}; - - VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - nullptr, - 0, - 1024, // TODO: tweak this - static_cast(countof(pool_sizes)), - pool_sizes}; - - res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.descriptor_pool, "Frame Descriptor Pool %u", - frame_index); - - ++frame_index; - } - - ActivateCommandBuffer(0); - return true; -} - -void Vulkan::Context::DestroyCommandBuffers() -{ - for (FrameResources& resources : m_frame_resources) - { - for (auto& it : resources.cleanup_resources) - it(); - resources.cleanup_resources.clear(); - - if (resources.fence != VK_NULL_HANDLE) - { - vkDestroyFence(m_device, resources.fence, nullptr); - resources.fence = VK_NULL_HANDLE; - } - if (resources.descriptor_pool != VK_NULL_HANDLE) - { - vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr); - resources.descriptor_pool = VK_NULL_HANDLE; - } - if (resources.command_buffer != VK_NULL_HANDLE) - { - vkFreeCommandBuffers(m_device, resources.command_pool, 1, &resources.command_buffer); - resources.command_buffer = VK_NULL_HANDLE; - } - if (resources.command_pool != VK_NULL_HANDLE) - { - vkDestroyCommandPool(m_device, resources.command_pool, nullptr); - resources.command_pool = VK_NULL_HANDLE; - } - } -} - -bool Vulkan::Context::CreateGlobalDescriptorPool() -{ - // TODO: A better way to choose the number of descriptors. - VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}}; - - VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - nullptr, - VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - 1024, // TODO: tweak this - static_cast(countof(pool_sizes)), - pool_sizes}; - - const VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_global_descriptor_pool, "Global Descriptor Pool"); - return true; -} - -void Vulkan::Context::DestroyGlobalDescriptorPool() -{ - if (m_global_descriptor_pool == VK_NULL_HANDLE) - return; - - vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr); - m_global_descriptor_pool = VK_NULL_HANDLE; -} - -bool Vulkan::Context::CreateQueryPool() -{ - if (!m_gpu_timing_supported) - return true; - - const VkQueryPoolCreateInfo query_create_info = { - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 2, 0}; - const VkResult res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); - m_gpu_timing_supported = false; - return false; - } - - return true; -} - -void Vulkan::Context::DestroyQueryPool() -{ - if (!m_gpu_timing_supported) - return; - - vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr); - m_timestamp_query_pool = VK_NULL_HANDLE; -} - -bool Vulkan::Context::CreateTextureStreamBuffer() -{ - if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE)) - { - Log_ErrorPrintf("Failed to allocate texture upload buffer"); - return false; - } - - return true; -} - -void Vulkan::Context::DestroyRenderPassCache() -{ - for (auto& it : m_render_pass_cache) - vkDestroyRenderPass(m_device, it.second, nullptr); - - m_render_pass_cache.clear(); -} - -VkDescriptorSet Vulkan::Context::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) -{ - VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, - m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout}; - - VkDescriptorSet descriptor_set; - VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); - if (res != VK_SUCCESS) - { - // Failing to allocate a descriptor set is not a fatal error, we can - // recover by moving to the next command buffer. - return VK_NULL_HANDLE; - } - - return descriptor_set; -} - -VkDescriptorSet Vulkan::Context::AllocateGlobalDescriptorSet(VkDescriptorSetLayout set_layout) -{ - VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, - m_global_descriptor_pool, 1, &set_layout}; - - VkDescriptorSet descriptor_set; - VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); - if (res != VK_SUCCESS) - return VK_NULL_HANDLE; - - return descriptor_set; -} - -void Vulkan::Context::FreeGlobalDescriptorSet(VkDescriptorSet set) -{ - vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set); -} - -void Vulkan::Context::WaitForFenceCounter(u64 fence_counter) -{ - if (m_completed_fence_counter >= fence_counter) - return; - - // Find the first command buffer which covers this counter value. - u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; - while (index != m_current_frame) - { - if (m_frame_resources[index].fence_counter >= fence_counter) - break; - - index = (index + 1) % NUM_COMMAND_BUFFERS; - } - - Assert(index != m_current_frame); - WaitForCommandBufferCompletion(index); -} - -void Vulkan::Context::WaitForGPUIdle() -{ - WaitForPresentComplete(); - vkDeviceWaitIdle(m_device); -} - -float Vulkan::Context::GetAndResetAccumulatedGPUTime() -{ - const float time = m_accumulated_gpu_time; - m_accumulated_gpu_time = 0.0f; - return time; -} - -bool Vulkan::Context::SetEnableGPUTiming(bool enabled) -{ - m_gpu_timing_enabled = enabled && m_gpu_timing_supported; - return (enabled == m_gpu_timing_enabled); -} - -void Vulkan::Context::WaitForCommandBufferCompletion(u32 index) -{ - // Wait for this command buffer to be completed. - VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - - // Clean up any resources for command buffers between the last known completed buffer and this - // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. - const u64 now_completed_counter = m_frame_resources[index].fence_counter; - u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; - while (cleanup_index != m_current_frame) - { - FrameResources& resources = m_frame_resources[cleanup_index]; - if (resources.fence_counter > now_completed_counter) - break; - - if (resources.fence_counter > m_completed_fence_counter) - { - for (auto& it : resources.cleanup_resources) - it(); - resources.cleanup_resources.clear(); - } - - cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS; - } - - m_completed_fence_counter = now_completed_counter; -} - -void Vulkan::Context::SubmitCommandBuffer(VkSemaphore wait_semaphore /* = VK_NULL_HANDLE */, - VkSemaphore signal_semaphore /* = VK_NULL_HANDLE */, - VkSwapchainKHR present_swap_chain /* = VK_NULL_HANDLE */, - uint32_t present_image_index /* = 0xFFFFFFFF */, - bool submit_on_thread /* = false */) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - - if (m_gpu_timing_enabled && resources.timestamp_written) - { - vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, - m_current_frame * 2 + 1); - } - - // End the current command buffer. - VkResult res = vkEndCommandBuffer(resources.command_buffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); - Panic("Failed to end command buffer"); - } - - // This command buffer now has commands, so can't be re-used without waiting. - resources.needs_fence_wait = true; - - std::unique_lock lock(m_present_mutex); - WaitForPresentComplete(lock); - - if (!submit_on_thread || !m_present_thread.joinable()) - { - DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore); - if (present_swap_chain != VK_NULL_HANDLE) - DoPresent(signal_semaphore, present_swap_chain, present_image_index); - return; - } - - m_queued_present.command_buffer_index = m_current_frame; - m_queued_present.present_swap_chain = present_swap_chain; - m_queued_present.present_image_index = present_image_index; - m_queued_present.wait_semaphore = wait_semaphore; - m_queued_present.signal_semaphore = signal_semaphore; - m_present_done.store(false); - m_present_queued_cv.notify_one(); -} - -void Vulkan::Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore) -{ - FrameResources& resources = m_frame_resources[index]; - - uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, &wait_bits, 1u, - &resources.command_buffer, 0, nullptr}; - - if (wait_semaphore != VK_NULL_HANDLE) - { - submit_info.pWaitSemaphores = &wait_semaphore; - submit_info.waitSemaphoreCount = 1; - } - - if (signal_semaphore != VK_NULL_HANDLE) - { - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &signal_semaphore; - } - const Vulkan::Util::DebugScope debugScope(m_graphics_queue, "Context::DoSubmitCommandBuffer: %u", index); - - VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); - Panic("Failed to submit command buffer."); - } -} - -void Vulkan::Context::DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, - uint32_t present_image_index) -{ - // Should have a signal semaphore. - Assert(wait_semaphore != VK_NULL_HANDLE); - VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - nullptr, - 1, - &wait_semaphore, - 1, - &present_swap_chain, - &present_image_index, - nullptr}; - const Vulkan::Util::DebugScope debugScope(m_present_queue, "Context::DoPresent: %u", present_image_index); - VkResult res = vkQueuePresentKHR(m_present_queue, &present_info); - if (res != VK_SUCCESS) - { - // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. - if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) - LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); - - m_last_present_failed.store(true); - } -} - -void Vulkan::Context::WaitForPresentComplete() -{ - if (m_present_done.load()) - return; - - std::unique_lock lock(m_present_mutex); - WaitForPresentComplete(lock); -} - -void Vulkan::Context::WaitForPresentComplete(std::unique_lock& lock) -{ - if (m_present_done.load()) - return; - - m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); }); -} - -void Vulkan::Context::PresentThread() -{ - std::unique_lock lock(m_present_mutex); - while (!m_present_thread_done.load()) - { - m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); }); - - if (m_present_done.load()) - continue; - - DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.wait_semaphore, - m_queued_present.signal_semaphore); - DoPresent(m_queued_present.signal_semaphore, m_queued_present.present_swap_chain, - m_queued_present.present_image_index); - m_present_done.store(true); - m_present_done_cv.notify_one(); - } -} - -void Vulkan::Context::StartPresentThread() -{ - Assert(!m_present_thread.joinable()); - m_present_thread_done.store(false); - m_present_thread = std::thread(&Context::PresentThread, this); -} - -void Vulkan::Context::StopPresentThread() -{ - if (!m_present_thread.joinable()) - return; - - { - std::unique_lock lock(m_present_mutex); - WaitForPresentComplete(lock); - m_present_thread_done.store(true); - m_present_queued_cv.notify_one(); - } - - m_present_thread.join(); -} - -void Vulkan::Context::MoveToNextCommandBuffer() -{ - ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); -} - -void Vulkan::Context::ActivateCommandBuffer(u32 index) -{ - FrameResources& resources = m_frame_resources[index]; - - if (!m_present_done.load() && m_queued_present.command_buffer_index == index) - WaitForPresentComplete(); - - // Wait for the GPU to finish with all resources for this command buffer. - if (resources.fence_counter > m_completed_fence_counter) - WaitForCommandBufferCompletion(index); - - // Reset fence to unsignaled before starting. - VkResult res = vkResetFences(m_device, 1, &resources.fence); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkResetFences failed: "); - - // Reset command pools to beginning since we can re-use the memory now - res = vkResetCommandPool(m_device, resources.command_pool, 0); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: "); - - // Enable commands to be recorded to the two buffers again. - VkCommandBufferBeginInfo begin_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; - res = vkBeginCommandBuffer(resources.command_buffer, &begin_info); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: "); - - // Also can do the same for the descriptor pools - res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: "); - - if (m_gpu_timing_enabled) - { - if (resources.timestamp_written) - { - std::array timestamps; - res = - vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast(timestamps.size()), - sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT); - if (res == VK_SUCCESS) - { - // if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be - // zero - if (timestamps[0] > 0) - { - const double ns_diff = - (timestamps[1] - timestamps[0]) * static_cast(m_device_properties.limits.timestampPeriod); - m_accumulated_gpu_time = - static_cast(static_cast(m_accumulated_gpu_time) + (ns_diff / 1000000.0)); - } - } - else - { - LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); - } - } - - vkCmdResetQueryPool(resources.command_buffer, m_timestamp_query_pool, index * 2, 2); - vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, - index * 2); - } - - resources.fence_counter = m_next_fence_counter++; - resources.timestamp_written = m_gpu_timing_enabled; - - m_current_frame = index; - m_current_command_buffer = resources.command_buffer; - - // using the lower 32 bits of the fence index should be sufficient here, I hope... - vmaSetCurrentFrameIndex(m_allocator, static_cast(m_next_fence_counter)); -} - -void Vulkan::Context::ExecuteCommandBuffer(bool wait_for_completion) -{ - // If we're waiting for completion, don't bother waking the worker thread. - const u32 current_frame = m_current_frame; - SubmitCommandBuffer(); - MoveToNextCommandBuffer(); - - if (wait_for_completion) - WaitForCommandBufferCompletion(current_frame); -} - -bool Vulkan::Context::CheckLastPresentFail() -{ - bool res = m_last_present_failed; - m_last_present_failed = false; - return res; -} - -void Vulkan::Context::DeferBufferDestruction(VkBuffer object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkDestroyBuffer(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back( - [this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); }); -} - -void Vulkan::Context::DeferBufferViewDestruction(VkBufferView object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkDestroyBufferView(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferDeviceMemoryDestruction(VkDeviceMemory object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkFreeMemory(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferFramebufferDestruction(VkFramebuffer object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferImageDestruction(VkImage object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkDestroyImage(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferImageDestruction(VkImage object, VmaAllocation allocation) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back( - [this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); }); -} - -void Vulkan::Context::DeferImageViewDestruction(VkImageView object) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, object]() { vkDestroyImageView(m_device, object, nullptr); }); -} - -void Vulkan::Context::DeferPipelineDestruction(VkPipeline pipeline) -{ - FrameResources& resources = m_frame_resources[m_current_frame]; - resources.cleanup_resources.push_back([this, pipeline]() { vkDestroyPipeline(m_device, pipeline, nullptr); }); -} - -static VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, - VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, - void* pUserData) -{ - LOGLEVEL level; - if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) - level = LOGLEVEL_ERROR; - else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) - level = LOGLEVEL_WARNING; - else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) - level = LOGLEVEL_INFO; - else - level = LOGLEVEL_DEBUG; - - Log::Writef("Vulkan", __func__, level, "Vulkan debug report: (%s) %s", - pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage); - return VK_FALSE; -} - -bool Vulkan::Context::EnableDebugUtils() -{ - // Already enabled? - if (m_debug_messenger_callback != VK_NULL_HANDLE) - return true; - - // Check for presence of the functions before calling - if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT) - { - return false; - } - - VkDebugUtilsMessengerCreateInfoEXT messenger_info = { - VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - nullptr, - 0, - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, - VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, - DebugMessengerCallback, - nullptr}; - - VkResult res = vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: "); - return false; - } - - return true; -} - -void Vulkan::Context::DisableDebugUtils() -{ - if (m_debug_messenger_callback != VK_NULL_HANDLE) - { - vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr); - m_debug_messenger_callback = VK_NULL_HANDLE; - } -} - -VkRenderPass Vulkan::Context::GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, - VkAttachmentLoadOp load_op) -{ - auto key = std::tie(color_format, depth_format, samples, load_op); - auto it = m_render_pass_cache.find(key); - if (it != m_render_pass_cache.end()) - return it->second; - - VkAttachmentReference color_reference; - VkAttachmentReference* color_reference_ptr = nullptr; - VkAttachmentReference depth_reference; - VkAttachmentReference* depth_reference_ptr = nullptr; - std::array attachments; - u32 num_attachments = 0; - if (color_format != VK_FORMAT_UNDEFINED) - { - attachments[num_attachments] = {0, - color_format, - samples, - load_op, - VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL}; - color_reference.attachment = num_attachments; - color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - color_reference_ptr = &color_reference; - num_attachments++; - } - if (depth_format != VK_FORMAT_UNDEFINED) - { - attachments[num_attachments] = {0, - depth_format, - samples, - load_op, - VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_DONT_CARE, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL}; - depth_reference.attachment = num_attachments; - depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - depth_reference_ptr = &depth_reference; - num_attachments++; - } - - VkSubpassDescription subpass = {0, - VK_PIPELINE_BIND_POINT_GRAPHICS, - 0, - nullptr, - color_reference_ptr ? 1u : 0u, - color_reference_ptr ? color_reference_ptr : nullptr, - nullptr, - depth_reference_ptr, - 0, - nullptr}; - VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - nullptr, - 0, - num_attachments, - attachments.data(), - 1, - &subpass, - 0, - nullptr}; - - VkRenderPass pass; - VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: "); - return VK_NULL_HANDLE; - } - - m_render_pass_cache.emplace(key, pass); - return pass; -} diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h deleted file mode 100644 index bc9ebb872..000000000 --- a/src/common/vulkan/context.h +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "../types.h" -#include "loader.h" -#include "stream_buffer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct WindowInfo; - -namespace Vulkan { - -class SwapChain; - -class Context -{ -public: - enum : u32 - { - NUM_COMMAND_BUFFERS = 3 - }; - - struct OptionalExtensions - { - bool vk_ext_memory_budget : 1; - bool vk_khr_driver_properties : 1; - }; - - ~Context(); - - // Determines if the Vulkan validation layer is available on the system. - static bool CheckValidationLayerAvailablility(); - - // Helper method to create a Vulkan instance. - static VkInstance CreateVulkanInstance(const WindowInfo* wi, bool enable_debug_utils, bool enable_validation_layer); - - // Returns a list of Vulkan-compatible GPUs. - using GPUList = std::vector; - using GPUNameList = std::vector; - static GPUList EnumerateGPUs(VkInstance instance); - static GPUNameList EnumerateGPUNames(VkInstance instance); - - // Creates a new context and sets it up as global. - static bool Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr* out_swap_chain, - bool threaded_presentation, bool enable_debug_utils, bool enable_validation_layer, bool vsync); - - // Destroys context. - static void Destroy(); - - // Enable/disable debug message runtime. - bool EnableDebugUtils(); - void DisableDebugUtils(); - - // Global state accessors - ALWAYS_INLINE VkInstance GetVulkanInstance() const { return m_instance; } - ALWAYS_INLINE VkPhysicalDevice GetPhysicalDevice() const { return m_physical_device; } - ALWAYS_INLINE VkDevice GetDevice() const { return m_device; } - ALWAYS_INLINE VmaAllocator GetAllocator() const { return m_allocator; } - ALWAYS_INLINE VkQueue GetGraphicsQueue() const { return m_graphics_queue; } - ALWAYS_INLINE u32 GetGraphicsQueueFamilyIndex() const { return m_graphics_queue_family_index; } - ALWAYS_INLINE VkQueue GetPresentQueue() const { return m_present_queue; } - ALWAYS_INLINE u32 GetPresentQueueFamilyIndex() const { return m_present_queue_family_index; } - ALWAYS_INLINE const VkQueueFamilyProperties& GetGraphicsQueueProperties() const - { - return m_graphics_queue_properties; - } - ALWAYS_INLINE const VkPhysicalDeviceMemoryProperties& GetDeviceMemoryProperties() const - { - return m_device_memory_properties; - } - ALWAYS_INLINE const VkPhysicalDeviceProperties& GetDeviceProperties() const { return m_device_properties; } - ALWAYS_INLINE const VkPhysicalDeviceFeatures& GetDeviceFeatures() const { return m_device_features; } - ALWAYS_INLINE const VkPhysicalDeviceLimits& GetDeviceLimits() const { return m_device_properties.limits; } - ALWAYS_INLINE const VkPhysicalDeviceDriverProperties& GetDeviceDriverProperties() const - { - return m_device_driver_properties; - } - - // Support bits - ALWAYS_INLINE bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; } - ALWAYS_INLINE bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } - - // Helpers for getting constants - ALWAYS_INLINE u32 GetUniformBufferAlignment() const - { - return static_cast(m_device_properties.limits.minUniformBufferOffsetAlignment); - } - ALWAYS_INLINE u32 GetTexelBufferAlignment() const - { - return static_cast(m_device_properties.limits.minTexelBufferOffsetAlignment); - } - ALWAYS_INLINE u32 GetStorageBufferAlignment() const - { - return static_cast(m_device_properties.limits.minStorageBufferOffsetAlignment); - } - ALWAYS_INLINE u32 GetBufferImageGranularity() const - { - return static_cast(m_device_properties.limits.bufferImageGranularity); - } - ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const - { - return static_cast(m_device_properties.limits.optimalBufferCopyOffsetAlignment); - } - ALWAYS_INLINE u32 GetBufferCopyRowPitchAlignment() const - { - return static_cast(m_device_properties.limits.optimalBufferCopyRowPitchAlignment); - } - ALWAYS_INLINE u32 GetMaxImageDimension2D() const { return m_device_properties.limits.maxImageDimension2D; } - - // Creates a simple render pass. - VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, - VkAttachmentLoadOp load_op); - - // These command buffers are allocated per-frame. They are valid until the command buffer - // is submitted, after that you should call these functions again. - ALWAYS_INLINE VkDescriptorPool GetGlobalDescriptorPool() const { return m_global_descriptor_pool; } - ALWAYS_INLINE VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; } - ALWAYS_INLINE StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; } - ALWAYS_INLINE VkDescriptorPool GetCurrentDescriptorPool() const - { - return m_frame_resources[m_current_frame].descriptor_pool; - } - - /// Allocates a descriptor set from the pool reserved for the current frame. - VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout); - - /// Allocates a descriptor set from the pool reserved for the current frame. - VkDescriptorSet AllocateGlobalDescriptorSet(VkDescriptorSetLayout set_layout); - - /// Frees a descriptor set allocated from the global pool. - void FreeGlobalDescriptorSet(VkDescriptorSet set); - - // Gets the fence that will be signaled when the currently executing command buffer is - // queued and executed. Do not wait for this fence before the buffer is executed. - ALWAYS_INLINE VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; } - - // Fence "counters" are used to track which commands have been completed by the GPU. - // If the last completed fence counter is greater or equal to N, it means that the work - // associated counter N has been completed by the GPU. The value of N to associate with - // commands can be retreived by calling GetCurrentFenceCounter(). - u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; } - - // Gets the fence that will be signaled when the currently executing command buffer is - // queued and executed. Do not wait for this fence before the buffer is executed. - u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; } - - void SubmitCommandBuffer(VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkSemaphore signal_semaphore = VK_NULL_HANDLE, - VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, - uint32_t present_image_index = 0xFFFFFFFF, bool submit_on_thread = false); - void MoveToNextCommandBuffer(); - - void ExecuteCommandBuffer(bool wait_for_completion); - void WaitForPresentComplete(); - - // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. - bool CheckLastPresentFail(); - - // Schedule a vulkan resource for destruction later on. This will occur when the command buffer - // is next re-used, and the GPU has finished working with the specified resource. - void DeferBufferDestruction(VkBuffer object); - void DeferBufferDestruction(VkBuffer object, VmaAllocation allocation); - void DeferBufferViewDestruction(VkBufferView object); - void DeferDeviceMemoryDestruction(VkDeviceMemory object); - void DeferFramebufferDestruction(VkFramebuffer object); - void DeferImageDestruction(VkImage object); - void DeferImageDestruction(VkImage object, VmaAllocation allocation); - void DeferImageViewDestruction(VkImageView object); - void DeferPipelineDestruction(VkPipeline pipeline); - - // Wait for a fence to be completed. - // Also invokes callbacks for completion. - void WaitForFenceCounter(u64 fence_counter); - - void WaitForGPUIdle(); - - float GetAndResetAccumulatedGPUTime(); - bool SetEnableGPUTiming(bool enabled); - -private: - Context(VkInstance instance, VkPhysicalDevice physical_device, bool owns_device); - - using ExtensionList = std::vector; - static bool SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo* wi, bool enable_debug_utils); - bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface); - bool SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_features); - bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, const char** required_device_extensions, - u32 num_required_device_extensions, const char** required_device_layers, - u32 num_required_device_layers, const VkPhysicalDeviceFeatures* required_features); - void ProcessDeviceExtensions(); - - bool CreateAllocator(); - void DestroyAllocator(); - bool CreateCommandBuffers(); - void DestroyCommandBuffers(); - bool CreateGlobalDescriptorPool(); - void DestroyGlobalDescriptorPool(); - bool CreateQueryPool(); - void DestroyQueryPool(); - bool CreateTextureStreamBuffer(); - void DestroyRenderPassCache(); - - void ActivateCommandBuffer(u32 index); - void WaitForCommandBufferCompletion(u32 index); - - void DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore); - void DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index); - void WaitForPresentComplete(std::unique_lock& lock); - void PresentThread(); - void StartPresentThread(); - void StopPresentThread(); - - struct FrameResources - { - // [0] - Init (upload) command buffer, [1] - draw command buffer - VkCommandPool command_pool = VK_NULL_HANDLE; - VkCommandBuffer command_buffer = VK_NULL_HANDLE; - VkDescriptorPool descriptor_pool = VK_NULL_HANDLE; - VkFence fence = VK_NULL_HANDLE; - u64 fence_counter = 0; - bool needs_fence_wait = false; - bool timestamp_written = false; - - std::vector> cleanup_resources; - }; - - VkInstance m_instance = VK_NULL_HANDLE; - VkPhysicalDevice m_physical_device = VK_NULL_HANDLE; - VkDevice m_device = VK_NULL_HANDLE; - VmaAllocator m_allocator = VK_NULL_HANDLE; - - VkCommandBuffer m_current_command_buffer = VK_NULL_HANDLE; - - VkDescriptorPool m_global_descriptor_pool = VK_NULL_HANDLE; - - VkQueue m_graphics_queue = VK_NULL_HANDLE; - u32 m_graphics_queue_family_index = 0; - VkQueue m_present_queue = VK_NULL_HANDLE; - u32 m_present_queue_family_index = 0; - - VkQueryPool m_timestamp_query_pool = VK_NULL_HANDLE; - float m_accumulated_gpu_time = 0.0f; - bool m_gpu_timing_enabled = false; - bool m_gpu_timing_supported = false; - - std::array m_frame_resources; - u64 m_next_fence_counter = 1; - u64 m_completed_fence_counter = 0; - u32 m_current_frame; - - StreamBuffer m_texture_upload_buffer; - - std::atomic_bool m_last_present_failed{false}; - std::atomic_bool m_present_done{true}; - std::mutex m_present_mutex; - std::condition_variable m_present_queued_cv; - std::condition_variable m_present_done_cv; - std::thread m_present_thread; - std::atomic_bool m_present_thread_done{false}; - - struct QueuedPresent - { - VkSemaphore wait_semaphore; - VkSemaphore signal_semaphore; - VkSwapchainKHR present_swap_chain; - u32 command_buffer_index; - u32 present_image_index; - }; - - QueuedPresent m_queued_present = {}; - - // Render pass cache - using RenderPassCacheKey = std::tuple; - std::map m_render_pass_cache; - - VkDebugUtilsMessengerEXT m_debug_messenger_callback = VK_NULL_HANDLE; - - VkQueueFamilyProperties m_graphics_queue_properties = {}; - VkPhysicalDeviceFeatures m_device_features = {}; - VkPhysicalDeviceProperties m_device_properties = {}; - VkPhysicalDeviceMemoryProperties m_device_memory_properties = {}; - VkPhysicalDeviceDriverPropertiesKHR m_device_driver_properties = {}; - OptionalExtensions m_optional_extensions = {}; -}; - -} // namespace Vulkan - -extern std::unique_ptr g_vulkan_context; diff --git a/src/common/vulkan/entry_points.h b/src/common/vulkan/entry_points.h deleted file mode 100644 index 7c2689046..000000000 --- a/src/common/vulkan/entry_points.h +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -// We abuse the preprocessor here to only need to specify function names once. -// Function names are prefixed so to not conflict with system symbols at runtime. -#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_DEFINE_NAME_PREFIX ds_ -#include "entry_points.inl" -#undef VULKAN_DEFINE_NAME_PREFIX -#undef VULKAN_DEVICE_ENTRY_POINT -#undef VULKAN_INSTANCE_ENTRY_POINT -#undef VULKAN_MODULE_ENTRY_POINT - -#ifdef __cplusplus -} -#endif - -#define vkCreateInstance ds_vkCreateInstance -#define vkGetInstanceProcAddr ds_vkGetInstanceProcAddr -#define vkEnumerateInstanceExtensionProperties ds_vkEnumerateInstanceExtensionProperties -#define vkEnumerateInstanceLayerProperties ds_vkEnumerateInstanceLayerProperties -#define vkEnumerateInstanceVersion ds_vkEnumerateInstanceVersion - -#define vkGetDeviceProcAddr ds_vkGetDeviceProcAddr -#define vkDestroyInstance ds_vkDestroyInstance -#define vkEnumeratePhysicalDevices ds_vkEnumeratePhysicalDevices -#define vkGetPhysicalDeviceFeatures ds_vkGetPhysicalDeviceFeatures -#define vkGetPhysicalDeviceFormatProperties ds_vkGetPhysicalDeviceFormatProperties -#define vkGetPhysicalDeviceImageFormatProperties ds_vkGetPhysicalDeviceImageFormatProperties -#define vkGetPhysicalDeviceProperties ds_vkGetPhysicalDeviceProperties -#define vkGetPhysicalDeviceQueueFamilyProperties ds_vkGetPhysicalDeviceQueueFamilyProperties -#define vkGetPhysicalDeviceMemoryProperties ds_vkGetPhysicalDeviceMemoryProperties -#define vkCreateDevice ds_vkCreateDevice -#define vkEnumerateDeviceExtensionProperties ds_vkEnumerateDeviceExtensionProperties -#define vkEnumerateDeviceLayerProperties ds_vkEnumerateDeviceLayerProperties -#define vkGetPhysicalDeviceSparseImageFormatProperties ds_vkGetPhysicalDeviceSparseImageFormatProperties -#define vkDestroySurfaceKHR ds_vkDestroySurfaceKHR -#define vkGetPhysicalDeviceSurfaceSupportKHR ds_vkGetPhysicalDeviceSurfaceSupportKHR -#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR ds_vkGetPhysicalDeviceSurfaceCapabilitiesKHR -#define vkGetPhysicalDeviceSurfaceFormatsKHR ds_vkGetPhysicalDeviceSurfaceFormatsKHR -#define vkGetPhysicalDeviceSurfacePresentModesKHR ds_vkGetPhysicalDeviceSurfacePresentModesKHR -#define vkCreateWin32SurfaceKHR ds_vkCreateWin32SurfaceKHR -#define vkGetPhysicalDeviceWin32PresentationSupportKHR ds_vkGetPhysicalDeviceWin32PresentationSupportKHR -#define vkCreateXlibSurfaceKHR ds_vkCreateXlibSurfaceKHR -#define vkGetPhysicalDeviceXlibPresentationSupportKHR ds_vkGetPhysicalDeviceXlibPresentationSupportKHR -#define vkCreateWaylandSurfaceKHR ds_vkCreateWaylandSurfaceKHR -#define vkCreateAndroidSurfaceKHR ds_vkCreateAndroidSurfaceKHR -#define vkCreateMacOSSurfaceMVK ds_vkCreateMacOSSurfaceMVK -#define vkCreateMetalSurfaceEXT ds_vkCreateMetalSurfaceEXT - -// VK_EXT_debug_utils -#define vkCmdBeginDebugUtilsLabelEXT ds_vkCmdBeginDebugUtilsLabelEXT -#define vkCmdEndDebugUtilsLabelEXT ds_vkCmdEndDebugUtilsLabelEXT -#define vkCmdInsertDebugUtilsLabelEXT ds_vkCmdInsertDebugUtilsLabelEXT -#define vkCreateDebugUtilsMessengerEXT ds_vkCreateDebugUtilsMessengerEXT -#define vkDestroyDebugUtilsMessengerEXT ds_vkDestroyDebugUtilsMessengerEXT -#define vkQueueBeginDebugUtilsLabelEXT ds_vkQueueBeginDebugUtilsLabelEXT -#define vkQueueEndDebugUtilsLabelEXT ds_vkQueueEndDebugUtilsLabelEXT -#define vkQueueInsertDebugUtilsLabelEXT ds_vkQueueInsertDebugUtilsLabelEXT -#define vkSetDebugUtilsObjectNameEXT ds_vkSetDebugUtilsObjectNameEXT -#define vkSetDebugUtilsObjectTagEXT ds_vkSetDebugUtilsObjectTagEXT -#define vkSubmitDebugUtilsMessageEXT ds_vkSubmitDebugUtilsMessageEXT - -#define vkGetPhysicalDeviceSurfaceCapabilities2KHR ds_vkGetPhysicalDeviceSurfaceCapabilities2KHR -#define vkGetPhysicalDeviceDisplayPropertiesKHR ds_vkGetPhysicalDeviceDisplayPropertiesKHR -#define vkGetPhysicalDeviceDisplayPlanePropertiesKHR ds_vkGetPhysicalDeviceDisplayPlanePropertiesKHR -#define vkGetDisplayPlaneSupportedDisplaysKHR ds_vkGetDisplayPlaneSupportedDisplaysKHR -#define vkGetDisplayModePropertiesKHR ds_vkGetDisplayModePropertiesKHR -#define vkCreateDisplayModeKHR ds_vkCreateDisplayModeKHR -#define vkGetDisplayPlaneCapabilitiesKHR ds_vkGetDisplayPlaneCapabilitiesKHR -#define vkCreateDisplayPlaneSurfaceKHR ds_vkCreateDisplayPlaneSurfaceKHR - -// Vulkan 1.1 functions. -#define vkGetPhysicalDeviceFeatures2 ds_vkGetPhysicalDeviceFeatures2 -#define vkGetPhysicalDeviceProperties2 ds_vkGetPhysicalDeviceProperties2 -#define vkGetPhysicalDeviceMemoryProperties2 ds_vkGetPhysicalDeviceMemoryProperties2 - -#define vkDestroyDevice ds_vkDestroyDevice -#define vkGetDeviceQueue ds_vkGetDeviceQueue -#define vkQueueSubmit ds_vkQueueSubmit -#define vkQueueWaitIdle ds_vkQueueWaitIdle -#define vkDeviceWaitIdle ds_vkDeviceWaitIdle -#define vkAllocateMemory ds_vkAllocateMemory -#define vkFreeMemory ds_vkFreeMemory -#define vkMapMemory ds_vkMapMemory -#define vkUnmapMemory ds_vkUnmapMemory -#define vkFlushMappedMemoryRanges ds_vkFlushMappedMemoryRanges -#define vkInvalidateMappedMemoryRanges ds_vkInvalidateMappedMemoryRanges -#define vkGetDeviceMemoryCommitment ds_vkGetDeviceMemoryCommitment -#define vkBindBufferMemory ds_vkBindBufferMemory -#define vkBindImageMemory ds_vkBindImageMemory -#define vkGetBufferMemoryRequirements ds_vkGetBufferMemoryRequirements -#define vkGetImageMemoryRequirements ds_vkGetImageMemoryRequirements -#define vkGetImageSparseMemoryRequirements ds_vkGetImageSparseMemoryRequirements -#define vkQueueBindSparse ds_vkQueueBindSparse -#define vkCreateFence ds_vkCreateFence -#define vkDestroyFence ds_vkDestroyFence -#define vkResetFences ds_vkResetFences -#define vkGetFenceStatus ds_vkGetFenceStatus -#define vkWaitForFences ds_vkWaitForFences -#define vkCreateSemaphore ds_vkCreateSemaphore -#define vkDestroySemaphore ds_vkDestroySemaphore -#define vkCreateEvent ds_vkCreateEvent -#define vkDestroyEvent ds_vkDestroyEvent -#define vkGetEventStatus ds_vkGetEventStatus -#define vkSetEvent ds_vkSetEvent -#define vkResetEvent ds_vkResetEvent -#define vkCreateQueryPool ds_vkCreateQueryPool -#define vkDestroyQueryPool ds_vkDestroyQueryPool -#define vkGetQueryPoolResults ds_vkGetQueryPoolResults -#define vkCreateBuffer ds_vkCreateBuffer -#define vkDestroyBuffer ds_vkDestroyBuffer -#define vkCreateBufferView ds_vkCreateBufferView -#define vkDestroyBufferView ds_vkDestroyBufferView -#define vkCreateImage ds_vkCreateImage -#define vkDestroyImage ds_vkDestroyImage -#define vkGetImageSubresourceLayout ds_vkGetImageSubresourceLayout -#define vkCreateImageView ds_vkCreateImageView -#define vkDestroyImageView ds_vkDestroyImageView -#define vkCreateShaderModule ds_vkCreateShaderModule -#define vkDestroyShaderModule ds_vkDestroyShaderModule -#define vkCreatePipelineCache ds_vkCreatePipelineCache -#define vkDestroyPipelineCache ds_vkDestroyPipelineCache -#define vkGetPipelineCacheData ds_vkGetPipelineCacheData -#define vkMergePipelineCaches ds_vkMergePipelineCaches -#define vkCreateGraphicsPipelines ds_vkCreateGraphicsPipelines -#define vkCreateComputePipelines ds_vkCreateComputePipelines -#define vkDestroyPipeline ds_vkDestroyPipeline -#define vkCreatePipelineLayout ds_vkCreatePipelineLayout -#define vkDestroyPipelineLayout ds_vkDestroyPipelineLayout -#define vkCreateSampler ds_vkCreateSampler -#define vkDestroySampler ds_vkDestroySampler -#define vkCreateDescriptorSetLayout ds_vkCreateDescriptorSetLayout -#define vkDestroyDescriptorSetLayout ds_vkDestroyDescriptorSetLayout -#define vkCreateDescriptorPool ds_vkCreateDescriptorPool -#define vkDestroyDescriptorPool ds_vkDestroyDescriptorPool -#define vkResetDescriptorPool ds_vkResetDescriptorPool -#define vkAllocateDescriptorSets ds_vkAllocateDescriptorSets -#define vkFreeDescriptorSets ds_vkFreeDescriptorSets -#define vkUpdateDescriptorSets ds_vkUpdateDescriptorSets -#define vkCreateFramebuffer ds_vkCreateFramebuffer -#define vkDestroyFramebuffer ds_vkDestroyFramebuffer -#define vkCreateRenderPass ds_vkCreateRenderPass -#define vkDestroyRenderPass ds_vkDestroyRenderPass -#define vkGetRenderAreaGranularity ds_vkGetRenderAreaGranularity -#define vkCreateCommandPool ds_vkCreateCommandPool -#define vkDestroyCommandPool ds_vkDestroyCommandPool -#define vkResetCommandPool ds_vkResetCommandPool -#define vkAllocateCommandBuffers ds_vkAllocateCommandBuffers -#define vkFreeCommandBuffers ds_vkFreeCommandBuffers -#define vkBeginCommandBuffer ds_vkBeginCommandBuffer -#define vkEndCommandBuffer ds_vkEndCommandBuffer -#define vkResetCommandBuffer ds_vkResetCommandBuffer -#define vkCmdBindPipeline ds_vkCmdBindPipeline -#define vkCmdSetViewport ds_vkCmdSetViewport -#define vkCmdSetScissor ds_vkCmdSetScissor -#define vkCmdSetLineWidth ds_vkCmdSetLineWidth -#define vkCmdSetDepthBias ds_vkCmdSetDepthBias -#define vkCmdSetBlendConstants ds_vkCmdSetBlendConstants -#define vkCmdSetDepthBounds ds_vkCmdSetDepthBounds -#define vkCmdSetStencilCompareMask ds_vkCmdSetStencilCompareMask -#define vkCmdSetStencilWriteMask ds_vkCmdSetStencilWriteMask -#define vkCmdSetStencilReference ds_vkCmdSetStencilReference -#define vkCmdBindDescriptorSets ds_vkCmdBindDescriptorSets -#define vkCmdBindIndexBuffer ds_vkCmdBindIndexBuffer -#define vkCmdBindVertexBuffers ds_vkCmdBindVertexBuffers -#define vkCmdDraw ds_vkCmdDraw -#define vkCmdDrawIndexed ds_vkCmdDrawIndexed -#define vkCmdDrawIndirect ds_vkCmdDrawIndirect -#define vkCmdDrawIndexedIndirect ds_vkCmdDrawIndexedIndirect -#define vkCmdDispatch ds_vkCmdDispatch -#define vkCmdDispatchIndirect ds_vkCmdDispatchIndirect -#define vkCmdCopyBuffer ds_vkCmdCopyBuffer -#define vkCmdCopyImage ds_vkCmdCopyImage -#define vkCmdBlitImage ds_vkCmdBlitImage -#define vkCmdCopyBufferToImage ds_vkCmdCopyBufferToImage -#define vkCmdCopyImageToBuffer ds_vkCmdCopyImageToBuffer -#define vkCmdUpdateBuffer ds_vkCmdUpdateBuffer -#define vkCmdFillBuffer ds_vkCmdFillBuffer -#define vkCmdClearColorImage ds_vkCmdClearColorImage -#define vkCmdClearDepthStencilImage ds_vkCmdClearDepthStencilImage -#define vkCmdClearAttachments ds_vkCmdClearAttachments -#define vkCmdResolveImage ds_vkCmdResolveImage -#define vkCmdSetEvent ds_vkCmdSetEvent -#define vkCmdResetEvent ds_vkCmdResetEvent -#define vkCmdWaitEvents ds_vkCmdWaitEvents -#define vkCmdPipelineBarrier ds_vkCmdPipelineBarrier -#define vkCmdBeginQuery ds_vkCmdBeginQuery -#define vkCmdEndQuery ds_vkCmdEndQuery -#define vkCmdResetQueryPool ds_vkCmdResetQueryPool -#define vkCmdWriteTimestamp ds_vkCmdWriteTimestamp -#define vkCmdCopyQueryPoolResults ds_vkCmdCopyQueryPoolResults -#define vkCmdPushConstants ds_vkCmdPushConstants -#define vkCmdBeginRenderPass ds_vkCmdBeginRenderPass -#define vkCmdNextSubpass ds_vkCmdNextSubpass -#define vkCmdEndRenderPass ds_vkCmdEndRenderPass -#define vkCmdExecuteCommands ds_vkCmdExecuteCommands -#define vkCreateSwapchainKHR ds_vkCreateSwapchainKHR -#define vkDestroySwapchainKHR ds_vkDestroySwapchainKHR -#define vkGetSwapchainImagesKHR ds_vkGetSwapchainImagesKHR -#define vkAcquireNextImageKHR ds_vkAcquireNextImageKHR -#define vkQueuePresentKHR ds_vkQueuePresentKHR - -// Vulkan 1.1 functions. -#define vkGetBufferMemoryRequirements2 ds_vkGetBufferMemoryRequirements2 -#define vkGetImageMemoryRequirements2 ds_vkGetImageMemoryRequirements2 -#define vkBindBufferMemory2 ds_vkBindBufferMemory2 -#define vkBindImageMemory2 ds_vkBindImageMemory2 - -#ifdef SUPPORTS_VULKAN_EXCLUSIVE_FULLSCREEN -#define vkAcquireFullScreenExclusiveModeEXT ds_vkAcquireFullScreenExclusiveModeEXT -#define vkReleaseFullScreenExclusiveModeEXT ds_vkReleaseFullScreenExclusiveModeEXT -#endif - -// Vulkan 1.3 functions. -#define vkGetDeviceBufferMemoryRequirements ds_vkGetDeviceBufferMemoryRequirements -#define vkGetDeviceImageMemoryRequirements ds_vkGetDeviceImageMemoryRequirements diff --git a/src/common/vulkan/shader_cache.cpp b/src/common/vulkan/shader_cache.cpp deleted file mode 100644 index 2ba6d8030..000000000 --- a/src/common/vulkan/shader_cache.cpp +++ /dev/null @@ -1,525 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_cache.h" -#include "../assert.h" -#include "../file_system.h" -#include "../log.h" -#include "../md5_digest.h" -#include "context.h" -#include "shader_compiler.h" -#include "util.h" -Log_SetChannel(Vulkan::ShaderCache); - -// TODO: store the driver version and stuff in the shader header - -std::unique_ptr g_vulkan_shader_cache; - -namespace Vulkan { - -using ShaderCompiler::SPIRVCodeType; -using ShaderCompiler::SPIRVCodeVector; - -#pragma pack(push, 4) -struct VK_PIPELINE_CACHE_HEADER -{ - u32 header_length; - u32 header_version; - u32 vendor_id; - u32 device_id; - u8 uuid[VK_UUID_SIZE]; -}; - -struct CacheIndexEntry -{ - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - u32 shader_type; - u32 file_offset; - u32 blob_size; -}; -#pragma pack(pop) - -static bool ValidatePipelineCacheHeader(const VK_PIPELINE_CACHE_HEADER& header) -{ - if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - Log_ErrorPrintf("Pipeline cache failed validation: Invalid header length"); - return false; - } - - if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - { - Log_ErrorPrintf("Pipeline cache failed validation: Invalid header version"); - return false; - } - - if (header.vendor_id != g_vulkan_context->GetDeviceProperties().vendorID) - { - Log_ErrorPrintf("Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", - header.vendor_id, g_vulkan_context->GetDeviceProperties().vendorID); - return false; - } - - if (header.device_id != g_vulkan_context->GetDeviceProperties().deviceID) - { - Log_ErrorPrintf("Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", - header.device_id, g_vulkan_context->GetDeviceProperties().deviceID); - return false; - } - - if (std::memcmp(header.uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, VK_UUID_SIZE) != 0) - { - Log_ErrorPrintf("Pipeline cache failed validation: Incorrect UUID"); - return false; - } - - return true; -} - -static void FillPipelineCacheHeader(VK_PIPELINE_CACHE_HEADER* header) -{ - header->header_length = sizeof(VK_PIPELINE_CACHE_HEADER); - header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; - header->vendor_id = g_vulkan_context->GetDeviceProperties().vendorID; - header->device_id = g_vulkan_context->GetDeviceProperties().deviceID; - std::memcpy(header->uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, VK_UUID_SIZE); -} - -ShaderCache::ShaderCache() = default; - -ShaderCache::~ShaderCache() -{ - CloseShaderCache(); - FlushPipelineCache(); - ClosePipelineCache(); -} - -bool ShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const -{ - return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high && - source_length == key.source_length && shader_type == key.shader_type); -} - -bool ShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const -{ - return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high || - source_length != key.source_length || shader_type != key.shader_type); -} - -void ShaderCache::Create(std::string_view base_path, u32 version, bool debug) -{ - Assert(!g_vulkan_shader_cache); - g_vulkan_shader_cache.reset(new ShaderCache()); - g_vulkan_shader_cache->Open(base_path, version, debug); -} - -void ShaderCache::Destroy() -{ - g_vulkan_shader_cache.reset(); -} - -void ShaderCache::Open(std::string_view base_path, u32 version, bool debug) -{ - m_version = version; - m_debug = debug; - - if (!base_path.empty()) - { - m_pipeline_cache_filename = GetPipelineCacheBaseFileName(base_path, debug); - - const std::string base_filename = GetShaderCacheBaseFileName(base_path, debug); - const std::string index_filename = base_filename + ".idx"; - const std::string blob_filename = base_filename + ".bin"; - - if (!ReadExistingShaderCache(index_filename, blob_filename)) - CreateNewShaderCache(index_filename, blob_filename); - - if (!ReadExistingPipelineCache()) - CreateNewPipelineCache(); - } - else - { - CreateNewPipelineCache(); - } -} - -VkPipelineCache ShaderCache::GetPipelineCache(bool set_dirty /*= true*/) -{ - if (m_pipeline_cache == VK_NULL_HANDLE) - return VK_NULL_HANDLE; - - m_pipeline_cache_dirty |= set_dirty; - return m_pipeline_cache; -} - -bool ShaderCache::CreateNewShaderCache(const std::string& index_filename, const std::string& blob_filename) -{ - if (FileSystem::FileExists(index_filename.c_str())) - { - Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); - FileSystem::DeleteFile(index_filename.c_str()); - } - if (FileSystem::FileExists(blob_filename.c_str())) - { - Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); - FileSystem::DeleteFile(blob_filename.c_str()); - } - - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); - if (!m_index_file) - { - Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); - return false; - } - - const u32 index_version = FILE_VERSION; - VK_PIPELINE_CACHE_HEADER header; - FillPipelineCacheHeader(&header); - - if (std::fwrite(&index_version, sizeof(index_version), 1, m_index_file) != 1 || - std::fwrite(&m_version, sizeof(m_version), 1, m_index_file) != 1 || - std::fwrite(&header, sizeof(header), 1, m_index_file) != 1) - { - Log_ErrorPrintf("Failed to write header to index file '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - FileSystem::DeleteFile(index_filename.c_str()); - return false; - } - - return true; -} - -bool ShaderCache::ReadExistingShaderCache(const std::string& index_filename, const std::string& blob_filename) -{ - m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); - if (!m_index_file) - return false; - - u32 file_version = 0; - u32 data_version = 0; - if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != FILE_VERSION || - std::fread(&data_version, sizeof(data_version), 1, m_index_file) != 1 || data_version != m_version) - { - Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - VK_PIPELINE_CACHE_HEADER header; - if (std::fread(&header, sizeof(header), 1, m_index_file) != 1 || !ValidatePipelineCacheHeader(header)) - { - Log_ErrorPrintf("Mismatched pipeline cache header in '%s' (GPU/driver changed?)", index_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); - if (!m_blob_file) - { - Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - std::fseek(m_blob_file, 0, SEEK_END); - const u32 blob_file_size = static_cast(std::ftell(m_blob_file)); - - for (;;) - { - CacheIndexEntry entry; - if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 || - (entry.file_offset + entry.blob_size) > blob_file_size) - { - if (std::feof(m_index_file)) - break; - - Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); - m_index.clear(); - std::fclose(m_blob_file); - m_blob_file = nullptr; - std::fclose(m_index_file); - m_index_file = nullptr; - return false; - } - - const CacheIndexKey key{entry.source_hash_low, entry.source_hash_high, entry.source_length, - static_cast(entry.shader_type)}; - const CacheIndexData data{entry.file_offset, entry.blob_size}; - m_index.emplace(key, data); - } - - // ensure we don't write before seeking - std::fseek(m_index_file, 0, SEEK_END); - - Log_InfoPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str()); - return true; -} - -void ShaderCache::CloseShaderCache() -{ - if (m_index_file) - { - std::fclose(m_index_file); - m_index_file = nullptr; - } - if (m_blob_file) - { - std::fclose(m_blob_file); - m_blob_file = nullptr; - } -} - -bool ShaderCache::CreateNewPipelineCache() -{ - if (!m_pipeline_cache_filename.empty() && FileSystem::FileExists(m_pipeline_cache_filename.c_str())) - { - Log_WarningPrintf("Removing existing pipeline cache '%s'", m_pipeline_cache_filename.c_str()); - FileSystem::DeleteFile(m_pipeline_cache_filename.c_str()); - } - - const VkPipelineCacheCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, nullptr, 0, 0, nullptr}; - VkResult res = vkCreatePipelineCache(g_vulkan_context->GetDevice(), &ci, nullptr, &m_pipeline_cache); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache() failed: "); - return false; - } - - m_pipeline_cache_dirty = true; - return true; -} - -bool ShaderCache::ReadExistingPipelineCache() -{ - std::optional> data = FileSystem::ReadBinaryFile(m_pipeline_cache_filename.c_str()); - if (!data.has_value()) - return false; - - if (data->size() < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - Log_ErrorPrintf("Pipeline cache at '%s' is too small", m_pipeline_cache_filename.c_str()); - return false; - } - - VK_PIPELINE_CACHE_HEADER header; - std::memcpy(&header, data->data(), sizeof(header)); - if (!ValidatePipelineCacheHeader(header)) - return false; - - const VkPipelineCacheCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, nullptr, 0, data->size(), - data->data()}; - VkResult res = vkCreatePipelineCache(g_vulkan_context->GetDevice(), &ci, nullptr, &m_pipeline_cache); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache() failed: "); - return false; - } - - return true; -} - -bool ShaderCache::FlushPipelineCache() -{ - if (m_pipeline_cache == VK_NULL_HANDLE || !m_pipeline_cache_dirty || m_pipeline_cache_filename.empty()) - return false; - - size_t data_size; - VkResult res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() failed: "); - return false; - } - - std::vector data(data_size); - res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, data.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() (2) failed: "); - return false; - } - - data.resize(data_size); - - // Save disk writes if it hasn't changed, think of the poor SSDs. - FILESYSTEM_STAT_DATA sd; - if (!FileSystem::StatFile(m_pipeline_cache_filename.c_str(), &sd) || sd.Size != static_cast(data_size)) - { - Log_InfoPrintf("Writing %zu bytes to '%s'", data_size, m_pipeline_cache_filename.c_str()); - if (!FileSystem::WriteBinaryFile(m_pipeline_cache_filename.c_str(), data.data(), data.size())) - { - Log_ErrorPrintf("Failed to write pipeline cache to '%s'", m_pipeline_cache_filename.c_str()); - return false; - } - } - else - { - Log_InfoPrintf("Skipping updating pipeline cache '%s' due to no changes.", m_pipeline_cache_filename.c_str()); - } - - m_pipeline_cache_dirty = false; - return true; -} - -void ShaderCache::ClosePipelineCache() -{ - if (m_pipeline_cache == VK_NULL_HANDLE) - return; - - vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); - m_pipeline_cache = VK_NULL_HANDLE; -} - -std::string ShaderCache::GetShaderCacheBaseFileName(const std::string_view& base_path, bool debug) -{ - std::string base_filename(base_path); - base_filename += FS_OSPATH_SEPARATOR_STR "vulkan_shaders"; - - if (debug) - base_filename += "_debug"; - - return base_filename; -} - -std::string ShaderCache::GetPipelineCacheBaseFileName(const std::string_view& base_path, bool debug) -{ - std::string base_filename(base_path); - base_filename += FS_OSPATH_SEPARATOR_STR "vulkan_pipelines"; - - if (debug) - base_filename += "_debug"; - - base_filename += ".bin"; - return base_filename; -} - -ShaderCache::CacheIndexKey ShaderCache::GetCacheKey(ShaderCompiler::Type type, const std::string_view& shader_code) -{ - union HashParts - { - struct - { - u64 hash_low; - u64 hash_high; - }; - u8 hash[16]; - }; - HashParts h; - - MD5Digest digest; - digest.Update(shader_code.data(), static_cast(shader_code.length())); - digest.Final(h.hash); - - return CacheIndexKey{h.hash_low, h.hash_high, static_cast(shader_code.length()), type}; -} - -std::optional ShaderCache::GetShaderSPV(ShaderCompiler::Type type, - std::string_view shader_code) -{ - const auto key = GetCacheKey(type, shader_code); - auto iter = m_index.find(key); - if (iter == m_index.end()) - return CompileAndAddShaderSPV(key, shader_code); - - SPIRVCodeVector spv(iter->second.blob_size); - if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || - std::fread(spv.data(), sizeof(SPIRVCodeType), iter->second.blob_size, m_blob_file) != iter->second.blob_size) - { - Log_ErrorPrintf("Read blob from file failed, recompiling"); - return ShaderCompiler::CompileShader(type, shader_code, m_debug); - } - - return spv; -} - -VkShaderModule ShaderCache::GetShaderModule(ShaderCompiler::Type type, std::string_view shader_code) -{ - std::optional spv = GetShaderSPV(type, shader_code); - if (!spv.has_value()) - return VK_NULL_HANDLE; - - const VkShaderModuleCreateInfo ci{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, nullptr, 0, - spv->size() * sizeof(SPIRVCodeType), spv->data()}; - - VkShaderModule mod; - VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &ci, nullptr, &mod); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateShaderModule() failed: "); - return VK_NULL_HANDLE; - } - - return mod; -} - -VkShaderModule ShaderCache::GetVertexShader(std::string_view shader_code) -{ - return GetShaderModule(ShaderCompiler::Type::Vertex, std::move(shader_code)); -} - -VkShaderModule ShaderCache::GetGeometryShader(std::string_view shader_code) -{ - return GetShaderModule(ShaderCompiler::Type::Geometry, std::move(shader_code)); -} - -VkShaderModule ShaderCache::GetFragmentShader(std::string_view shader_code) -{ - return GetShaderModule(ShaderCompiler::Type::Fragment, std::move(shader_code)); -} - -VkShaderModule ShaderCache::GetComputeShader(std::string_view shader_code) -{ - return GetShaderModule(ShaderCompiler::Type::Compute, std::move(shader_code)); -} - -std::optional ShaderCache::CompileAndAddShaderSPV(const CacheIndexKey& key, - std::string_view shader_code) -{ - std::optional spv = ShaderCompiler::CompileShader(key.shader_type, shader_code, m_debug); - if (!spv.has_value()) - return {}; - - if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0) - return spv; - - CacheIndexData data; - data.file_offset = static_cast(std::ftell(m_blob_file)); - data.blob_size = static_cast(spv->size()); - - CacheIndexEntry entry = {}; - entry.source_hash_low = key.source_hash_low; - entry.source_hash_high = key.source_hash_high; - entry.source_length = key.source_length; - entry.shader_type = static_cast(key.shader_type); - entry.blob_size = data.blob_size; - entry.file_offset = data.file_offset; - - if (std::fwrite(spv->data(), sizeof(SPIRVCodeType), entry.blob_size, m_blob_file) != entry.blob_size || - std::fflush(m_blob_file) != 0 || std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || - std::fflush(m_index_file) != 0) - { - Log_ErrorPrintf("Failed to write shader blob to file"); - return spv; - } - - m_index.emplace(key, data); - return spv; -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/common/vulkan/shader_cache.h b/src/common/vulkan/shader_cache.h deleted file mode 100644 index 8504b781e..000000000 --- a/src/common/vulkan/shader_cache.h +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../hash_combine.h" -#include "../types.h" -#include "loader.h" -#include "shader_compiler.h" -#include -#include -#include -#include -#include -#include -#include - -namespace Vulkan { - -class ShaderCache -{ -public: - ~ShaderCache(); - - static void Create(std::string_view base_path, u32 version, bool debug); - static void Destroy(); - - /// Returns a handle to the pipeline cache. Set set_dirty to true if you are planning on writing to it externally. - VkPipelineCache GetPipelineCache(bool set_dirty = true); - - /// Writes pipeline cache to file, saving all newly compiled pipelines. - bool FlushPipelineCache(); - - std::optional GetShaderSPV(ShaderCompiler::Type type, std::string_view shader_code); - VkShaderModule GetShaderModule(ShaderCompiler::Type type, std::string_view shader_code); - - VkShaderModule GetVertexShader(std::string_view shader_code); - VkShaderModule GetGeometryShader(std::string_view shader_code); - VkShaderModule GetFragmentShader(std::string_view shader_code); - VkShaderModule GetComputeShader(std::string_view shader_code); - -private: - static constexpr u32 FILE_VERSION = 2; - - struct CacheIndexKey - { - u64 source_hash_low; - u64 source_hash_high; - u32 source_length; - ShaderCompiler::Type shader_type; - - bool operator==(const CacheIndexKey& key) const; - bool operator!=(const CacheIndexKey& key) const; - }; - - struct CacheIndexEntryHasher - { - std::size_t operator()(const CacheIndexKey& e) const noexcept - { - std::size_t h = 0; - hash_combine(h, e.source_hash_low, e.source_hash_high, e.source_length, e.shader_type); - return h; - } - }; - - struct CacheIndexData - { - u32 file_offset; - u32 blob_size; - }; - - using CacheIndex = std::unordered_map; - - ShaderCache(); - - static std::string GetShaderCacheBaseFileName(const std::string_view& base_path, bool debug); - static std::string GetPipelineCacheBaseFileName(const std::string_view& base_path, bool debug); - static CacheIndexKey GetCacheKey(ShaderCompiler::Type type, const std::string_view& shader_code); - - void Open(std::string_view base_path, u32 version, bool debug); - - bool CreateNewShaderCache(const std::string& index_filename, const std::string& blob_filename); - bool ReadExistingShaderCache(const std::string& index_filename, const std::string& blob_filename); - void CloseShaderCache(); - - bool CreateNewPipelineCache(); - bool ReadExistingPipelineCache(); - void ClosePipelineCache(); - - std::optional CompileAndAddShaderSPV(const CacheIndexKey& key, - std::string_view shader_code); - - std::FILE* m_index_file = nullptr; - std::FILE* m_blob_file = nullptr; - std::string m_pipeline_cache_filename; - - CacheIndex m_index; - - VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; - u32 m_version = 0; - bool m_debug = false; - bool m_pipeline_cache_dirty = false; -}; - -} // namespace Vulkan - -extern std::unique_ptr g_vulkan_shader_cache; diff --git a/src/common/vulkan/shader_compiler.cpp b/src/common/vulkan/shader_compiler.cpp deleted file mode 100644 index 664cb96b7..000000000 --- a/src/common/vulkan/shader_compiler.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "shader_compiler.h" -#include "../assert.h" -#include "../log.h" -#include "../string_util.h" -#include "util.h" -#include -#include -#include -Log_SetChannel(Vulkan::ShaderCompiler); - -// glslang includes -#include "SPIRV/GlslangToSpv.h" -#include "StandAlone/ResourceLimits.h" -#include "glslang/Public/ShaderLang.h" - -namespace Vulkan::ShaderCompiler { -// Registers itself for cleanup via atexit -bool InitializeGlslang(); - -static unsigned s_next_bad_shader_id = 1; - -static bool glslang_initialized = false; - -static std::optional CompileShaderToSPV(EShLanguage stage, const char* stage_filename, - std::string_view source) -{ - if (!InitializeGlslang()) - return std::nullopt; - - std::unique_ptr shader = std::make_unique(stage); - std::unique_ptr program; - glslang::TShader::ForbidIncluder includer; - EProfile profile = ECoreProfile; - EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); - int default_version = 450; - - std::string full_source_code; - const char* pass_source_code = source.data(); - int pass_source_code_length = static_cast(source.size()); - shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); - - auto DumpBadShader = [&](const char* msg) { - std::string filename = StringUtil::StdStringFromFormat("bad_shader_%u.txt", s_next_bad_shader_id++); - Log::Writef("Vulkan", "CompileShaderToSPV", LOGLEVEL_ERROR, "%s, writing to %s", msg, filename.c_str()); - - std::ofstream ofs(filename.c_str(), std::ofstream::out | std::ofstream::binary); - if (ofs.is_open()) - { - ofs << source; - ofs << "\n"; - - ofs << msg << std::endl; - ofs << "Shader Info Log:" << std::endl; - ofs << shader->getInfoLog() << std::endl; - ofs << shader->getInfoDebugLog() << std::endl; - if (program) - { - ofs << "Program Info Log:" << std::endl; - ofs << program->getInfoLog() << std::endl; - ofs << program->getInfoDebugLog() << std::endl; - } - - ofs.close(); - } - }; - - if (!shader->parse(&glslang::DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) - { - DumpBadShader("Failed to parse shader"); - return std::nullopt; - } - - // Even though there's only a single shader, we still need to link it to generate SPV - program = std::make_unique(); - program->addShader(shader.get()); - if (!program->link(messages)) - { - DumpBadShader("Failed to link program"); - return std::nullopt; - } - - glslang::TIntermediate* intermediate = program->getIntermediate(stage); - if (!intermediate) - { - DumpBadShader("Failed to generate SPIR-V"); - return std::nullopt; - } - - SPIRVCodeVector out_code; - spv::SpvBuildLogger logger; - glslang::GlslangToSpv(*intermediate, out_code, &logger); - - // Write out messages - // Temporary: skip if it contains "Warning, version 450 is not yet complete; most version-specific - // features are present, but some are missing." - if (std::strlen(shader->getInfoLog()) > 108) - Log_WarningPrintf("Shader info log: %s", shader->getInfoLog()); - if (std::strlen(shader->getInfoDebugLog()) > 0) - Log_WarningPrintf("Shader debug info log: %s", shader->getInfoDebugLog()); - if (std::strlen(program->getInfoLog()) > 25) - Log_WarningPrintf("Program info log: %s", program->getInfoLog()); - if (std::strlen(program->getInfoDebugLog()) > 0) - Log_WarningPrintf("Program debug info log: %s", program->getInfoDebugLog()); - std::string spv_messages = logger.getAllMessages(); - if (!spv_messages.empty()) - Log_WarningPrintf("SPIR-V conversion messages: %s", spv_messages.c_str()); - - return out_code; -} - -bool InitializeGlslang() -{ - if (glslang_initialized) - return true; - - if (!glslang::InitializeProcess()) - { - Panic("Failed to initialize glslang shader compiler"); - return false; - } - - std::atexit([]() { glslang::FinalizeProcess(); }); - - glslang_initialized = true; - return true; -} - -void DeinitializeGlslang() -{ - if (!glslang_initialized) - return; - - glslang::FinalizeProcess(); - glslang_initialized = false; -} - -std::optional CompileVertexShader(std::string_view source_code) -{ - return CompileShaderToSPV(EShLangVertex, "vs", source_code); -} - -std::optional CompileGeometryShader(std::string_view source_code) -{ - return CompileShaderToSPV(EShLangGeometry, "gs", source_code); -} - -std::optional CompileFragmentShader(std::string_view source_code) -{ - return CompileShaderToSPV(EShLangFragment, "ps", source_code); -} - -std::optional CompileComputeShader(std::string_view source_code) -{ - return CompileShaderToSPV(EShLangCompute, "cs", source_code); -} - -std::optional CompileShader(Type type, std::string_view source_code, bool debug) -{ - switch (type) - { - case Type::Vertex: - return CompileShaderToSPV(EShLangVertex, "vs", source_code); - - case Type::Geometry: - return CompileShaderToSPV(EShLangGeometry, "gs", source_code); - - case Type::Fragment: - return CompileShaderToSPV(EShLangFragment, "ps", source_code); - - case Type::Compute: - return CompileShaderToSPV(EShLangCompute, "cs", source_code); - - default: - return std::nullopt; - } -} - -} // namespace Vulkan::ShaderCompiler diff --git a/src/common/vulkan/shader_compiler.h b/src/common/vulkan/shader_compiler.h deleted file mode 100644 index 8537a63ea..000000000 --- a/src/common/vulkan/shader_compiler.h +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "../types.h" -#include -#include -#include - -namespace Vulkan::ShaderCompiler { - -// Shader types -enum class Type -{ - Vertex, - Geometry, - Fragment, - Compute -}; - -void DeinitializeGlslang(); - -// SPIR-V compiled code type -using SPIRVCodeType = u32; -using SPIRVCodeVector = std::vector; - -// Compile a vertex shader to SPIR-V. -std::optional CompileVertexShader(std::string_view source_code); - -// Compile a geometry shader to SPIR-V. -std::optional CompileGeometryShader(std::string_view source_code); - -// Compile a fragment shader to SPIR-V. -std::optional CompileFragmentShader(std::string_view source_code); - -// Compile a compute shader to SPIR-V. -std::optional CompileComputeShader(std::string_view source_code); - -std::optional CompileShader(Type type, std::string_view source_code, bool debug); - -} // namespace Vulkan::ShaderCompiler diff --git a/src/common/vulkan/swap_chain.cpp b/src/common/vulkan/swap_chain.cpp deleted file mode 100644 index d9a54e0dd..000000000 --- a/src/common/vulkan/swap_chain.cpp +++ /dev/null @@ -1,890 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "swap_chain.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" -#include "util.h" -#include -#include -#include -Log_SetChannel(Vulkan::SwapChain); - -#if defined(VK_USE_PLATFORM_XLIB_KHR) -#include -#endif - -#if defined(__APPLE__) -#include -#include - -static bool IsMainThread() -{ - Class clsNSThread = objc_getClass("NSThread"); - if (!clsNSThread) - return false; - - return reinterpret_cast(objc_msgSend)(clsNSThread, sel_getUid("isMainThread")); -} - -static bool CreateMetalLayer(WindowInfo* wi) -{ - if (!IsMainThread()) - { - struct MainThreadParams - { - WindowInfo* wi; - bool result; - }; - MainThreadParams params = {wi, false}; - dispatch_sync_f(dispatch_get_main_queue(), ¶ms, [](void* vparams) { - MainThreadParams* params = static_cast(vparams); - params->result = CreateMetalLayer(params->wi); - }); - return params.result; - } - - id view = reinterpret_cast(wi->window_handle); - - Class clsCAMetalLayer = objc_getClass("CAMetalLayer"); - if (!clsCAMetalLayer) - { - Log_ErrorPrint("Failed to get CAMetalLayer class."); - return false; - } - - // [CAMetalLayer layer] - id layer = reinterpret_cast(objc_msgSend)(clsCAMetalLayer, sel_getUid("layer")); - if (!layer) - { - Log_ErrorPrint("Failed to create Metal layer."); - return false; - } - - // [view setWantsLayer:YES] - reinterpret_cast(objc_msgSend)(view, sel_getUid("setWantsLayer:"), YES); - - // [view setLayer:layer] - reinterpret_cast(objc_msgSend)(view, sel_getUid("setLayer:"), layer); - - // NSScreen* screen = [NSScreen mainScreen] - id screen = reinterpret_cast(objc_msgSend)(objc_getClass("NSScreen"), sel_getUid("mainScreen")); - - // CGFloat factor = [screen backingScaleFactor] - double factor = reinterpret_cast(objc_msgSend)(screen, sel_getUid("backingScaleFactor")); - - // layer.contentsScale = factor - reinterpret_cast(objc_msgSend)(layer, sel_getUid("setContentsScale:"), factor); - - // Store the layer pointer, that way MoltenVK doesn't call [NSView layer] outside the main thread. - wi->surface_handle = layer; - return true; -} - -static void DestroyMetalLayer(WindowInfo* wi) -{ - if (!IsMainThread()) - { - dispatch_sync_f(dispatch_get_main_queue(), wi, [](void* wi) { DestroyMetalLayer(static_cast(wi)); }); - return; - } - - id view = reinterpret_cast(wi->window_handle); - id layer = reinterpret_cast(wi->surface_handle); - if (layer == nil) - return; - - reinterpret_cast(objc_msgSend)(view, sel_getUid("setLayer:"), nil); - reinterpret_cast(objc_msgSend)(view, sel_getUid("setWantsLayer:"), NO); - wi->surface_handle = nullptr; -} - -#endif - -namespace Vulkan { -SwapChain::SwapChain(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync) - : m_window_info(wi), m_surface(surface), m_vsync_enabled(vsync) -{ -} - -SwapChain::~SwapChain() -{ - DestroySemaphores(); - DestroySwapChainImages(); - DestroySwapChain(); - DestroySurface(); -} - -static VkSurfaceKHR CreateDisplaySurface(VkInstance instance, VkPhysicalDevice physical_device, WindowInfo* wi) -{ - Log_InfoPrintf("Trying to create a VK_KHR_display surface of %ux%u", wi->surface_width, wi->surface_height); - - u32 num_displays; - VkResult res = vkGetPhysicalDeviceDisplayPropertiesKHR(physical_device, &num_displays, nullptr); - if (res != VK_SUCCESS || num_displays == 0) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPropertiesKHR() failed:"); - return {}; - } - - std::vector displays(num_displays); - res = vkGetPhysicalDeviceDisplayPropertiesKHR(physical_device, &num_displays, displays.data()); - if (res != VK_SUCCESS || num_displays != displays.size()) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPropertiesKHR() failed:"); - return {}; - } - - for (u32 display_index = 0; display_index < num_displays; display_index++) - { - const VkDisplayPropertiesKHR& props = displays[display_index]; - Log_DevPrintf("Testing display '%s'", props.displayName); - - u32 num_modes; - res = vkGetDisplayModePropertiesKHR(physical_device, props.display, &num_modes, nullptr); - if (res != VK_SUCCESS || num_modes == 0) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayModePropertiesKHR() failed:"); - continue; - } - - std::vector modes(num_modes); - res = vkGetDisplayModePropertiesKHR(physical_device, props.display, &num_modes, modes.data()); - if (res != VK_SUCCESS || num_modes != modes.size()) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayModePropertiesKHR() failed:"); - continue; - } - - const VkDisplayModePropertiesKHR* matched_mode = nullptr; - for (const VkDisplayModePropertiesKHR& mode : modes) - { - const float refresh_rate = static_cast(mode.parameters.refreshRate) / 1000.0f; - Log_DevPrintf(" Mode %ux%u @ %f", mode.parameters.visibleRegion.width, mode.parameters.visibleRegion.height, - refresh_rate); - - if (!matched_mode && - ((wi->surface_width == 0 && wi->surface_height == 0) || - (mode.parameters.visibleRegion.width == wi->surface_width && - mode.parameters.visibleRegion.height == wi->surface_height && - (wi->surface_refresh_rate == 0.0f || std::abs(refresh_rate - wi->surface_refresh_rate) < 0.1f)))) - { - matched_mode = &mode; - } - } - - if (!matched_mode) - { - Log_DevPrintf("No modes matched on '%s'", props.displayName); - continue; - } - - u32 num_planes; - res = vkGetPhysicalDeviceDisplayPlanePropertiesKHR(physical_device, &num_planes, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPlanePropertiesKHR() failed:"); - continue; - } - if (num_planes == 0) - continue; - - std::vector planes(num_planes); - res = vkGetPhysicalDeviceDisplayPlanePropertiesKHR(physical_device, &num_planes, planes.data()); - if (res != VK_SUCCESS || num_planes != planes.size()) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPlanePropertiesKHR() failed:"); - continue; - } - - u32 plane_index = 0; - for (; plane_index < num_planes; plane_index++) - { - u32 supported_display_count; - res = vkGetDisplayPlaneSupportedDisplaysKHR(physical_device, plane_index, &supported_display_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayPlaneSupportedDisplaysKHR() failed:"); - continue; - } - if (supported_display_count == 0) - continue; - - std::vector supported_displays(supported_display_count); - res = vkGetDisplayPlaneSupportedDisplaysKHR(physical_device, plane_index, &supported_display_count, - supported_displays.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayPlaneSupportedDisplaysKHR() failed:"); - continue; - } - - const bool is_supported = - std::find(supported_displays.begin(), supported_displays.end(), props.display) != supported_displays.end(); - if (!is_supported) - continue; - - break; - } - - if (plane_index == num_planes) - { - Log_DevPrintf("No planes matched on '%s'", props.displayName); - continue; - } - - VkDisplaySurfaceCreateInfoKHR info = {}; - info.sType = VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR; - info.displayMode = matched_mode->displayMode; - info.planeIndex = plane_index; - info.planeStackIndex = planes[plane_index].currentStackIndex; - info.transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - info.globalAlpha = 1.0f; - info.alphaMode = VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR; - info.imageExtent = matched_mode->parameters.visibleRegion; - - VkSurfaceKHR surface; - res = vkCreateDisplayPlaneSurfaceKHR(instance, &info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDisplayPlaneSurfaceKHR() failed: "); - continue; - } - - wi->surface_refresh_rate = static_cast(matched_mode->parameters.refreshRate) / 1000.0f; - return surface; - } - - return VK_NULL_HANDLE; -} - -static std::vector GetDisplayModes(VkInstance instance, VkPhysicalDevice physical_device, - const WindowInfo& wi) -{ - - u32 num_displays; - VkResult res = vkGetPhysicalDeviceDisplayPropertiesKHR(physical_device, &num_displays, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPropertiesKHR() failed:"); - return {}; - } - if (num_displays == 0) - { - Log_ErrorPrint("No displays were returned"); - return {}; - } - - std::vector displays(num_displays); - res = vkGetPhysicalDeviceDisplayPropertiesKHR(physical_device, &num_displays, displays.data()); - if (res != VK_SUCCESS || num_displays != displays.size()) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceDisplayPropertiesKHR() failed:"); - return {}; - } - - std::vector result; - for (u32 display_index = 0; display_index < num_displays; display_index++) - { - const VkDisplayPropertiesKHR& props = displays[display_index]; - - u32 num_modes; - res = vkGetDisplayModePropertiesKHR(physical_device, props.display, &num_modes, nullptr); - if (res != VK_SUCCESS || num_modes == 0) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayModePropertiesKHR() failed:"); - continue; - } - - std::vector modes(num_modes); - res = vkGetDisplayModePropertiesKHR(physical_device, props.display, &num_modes, modes.data()); - if (res != VK_SUCCESS || num_modes != modes.size()) - { - LOG_VULKAN_ERROR(res, "vkGetDisplayModePropertiesKHR() failed:"); - continue; - } - - for (const VkDisplayModePropertiesKHR& mode : modes) - { - const float refresh_rate = static_cast(mode.parameters.refreshRate) / 1000.0f; - if (std::find_if(result.begin(), result.end(), [&mode, refresh_rate](const SwapChain::FullscreenModeInfo& mi) { - return (mi.width == mode.parameters.visibleRegion.width && - mi.height == mode.parameters.visibleRegion.height && mode.parameters.refreshRate == refresh_rate); - }) != result.end()) - { - continue; - } - - result.push_back(SwapChain::FullscreenModeInfo{static_cast(mode.parameters.visibleRegion.width), - static_cast(mode.parameters.visibleRegion.height), - refresh_rate}); - } - } - - return result; -} - -VkSurfaceKHR SwapChain::CreateVulkanSurface(VkInstance instance, VkPhysicalDevice physical_device, WindowInfo* wi) -{ -#if defined(VK_USE_PLATFORM_WIN32_KHR) - if (wi->type == WindowInfo::Type::Win32) - { - VkWin32SurfaceCreateInfoKHR surface_create_info = { - VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkWin32SurfaceCreateFlagsKHR flags - nullptr, // HINSTANCE hinstance - reinterpret_cast(wi->window_handle) // HWND hwnd - }; - - VkSurfaceKHR surface; - VkResult res = vkCreateWin32SurfaceKHR(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateWin32SurfaceKHR failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#endif - -#if defined(VK_USE_PLATFORM_XLIB_KHR) - if (wi->type == WindowInfo::Type::X11) - { - VkXlibSurfaceCreateInfoKHR surface_create_info = { - VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkXlibSurfaceCreateFlagsKHR flags - static_cast(wi->display_connection), // Display* dpy - reinterpret_cast(wi->window_handle) // Window window - }; - - VkSurfaceKHR surface; - VkResult res = vkCreateXlibSurfaceKHR(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateXlibSurfaceKHR failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#endif - -#if defined(VK_USE_PLATFORM_WAYLAND_KHR) - if (wi->type == WindowInfo::Type::Wayland) - { - VkWaylandSurfaceCreateInfoKHR surface_create_info = {VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, - static_cast(wi->display_connection), - static_cast(wi->window_handle)}; - - VkSurfaceKHR surface; - VkResult res = vkCreateWaylandSurfaceKHR(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateWaylandSurfaceEXT failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#endif - -#if defined(VK_USE_PLATFORM_ANDROID_KHR) - if (wi->type == WindowInfo::Type::Android) - { - VkAndroidSurfaceCreateInfoKHR surface_create_info = { - VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAndroidSurfaceCreateFlagsKHR flags - reinterpret_cast(wi->window_handle) // ANativeWindow* window - }; - - VkSurfaceKHR surface; - VkResult res = vkCreateAndroidSurfaceKHR(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateAndroidSurfaceKHR failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#endif - -#if defined(VK_USE_PLATFORM_METAL_EXT) - if (wi->type == WindowInfo::Type::MacOS) - { - if (!wi->surface_handle && !CreateMetalLayer(wi)) - return VK_NULL_HANDLE; - - VkMetalSurfaceCreateInfoEXT surface_create_info = {VK_STRUCTURE_TYPE_METAL_SURFACE_CREATE_INFO_EXT, nullptr, 0, - static_cast(wi->surface_handle)}; - - VkSurfaceKHR surface; - VkResult res = vkCreateMetalSurfaceEXT(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateMetalSurfaceEXT failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#elif defined(VK_USE_PLATFORM_MACOS_MVK) - if (wi->type == WindowInfo::Type::MacOS) - { - VkMacOSSurfaceCreateInfoMVK surface_create_info = {VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK, nullptr, 0, - wi->window_handle}; - - VkSurfaceKHR surface; - VkResult res = vkCreateMacOSSurfaceMVK(instance, &surface_create_info, nullptr, &surface); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateMacOSSurfaceMVK failed: "); - return VK_NULL_HANDLE; - } - - return surface; - } -#endif - - if (wi->type == WindowInfo::Type::Display) - return CreateDisplaySurface(instance, physical_device, wi); - - return VK_NULL_HANDLE; -} - -void SwapChain::DestroyVulkanSurface(VkInstance instance, WindowInfo* wi, VkSurfaceKHR surface) -{ - vkDestroySurfaceKHR(g_vulkan_context->GetVulkanInstance(), surface, nullptr); - -#if defined(__APPLE__) - if (wi->type == WindowInfo::Type::MacOS && wi->surface_handle) - DestroyMetalLayer(wi); -#endif -} - -std::vector -SwapChain::GetSurfaceFullscreenModes(VkInstance instance, VkPhysicalDevice physical_device, const WindowInfo& wi) -{ - if (wi.type == WindowInfo::Type::Display) - return GetDisplayModes(instance, physical_device, wi); - - return {}; -} - -std::unique_ptr SwapChain::Create(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync) -{ - std::unique_ptr swap_chain = std::make_unique(wi, surface, vsync); - if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages() || !swap_chain->CreateSemaphores()) - return nullptr; - - return swap_chain; -} - -bool SwapChain::SelectSurfaceFormat() -{ - u32 format_count; - VkResult res = - vkGetPhysicalDeviceSurfaceFormatsKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, &format_count, nullptr); - if (res != VK_SUCCESS || format_count == 0) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); - return false; - } - - std::vector surface_formats(format_count); - res = vkGetPhysicalDeviceSurfaceFormatsKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, &format_count, - surface_formats.data()); - Assert(res == VK_SUCCESS); - - // If there is a single undefined surface format, the device doesn't care, so we'll just use RGBA - if (surface_formats[0].format == VK_FORMAT_UNDEFINED) - { - m_surface_format.format = VK_FORMAT_R8G8B8A8_UNORM; - m_surface_format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - return true; - } - - // Try to find a suitable format. - for (const VkSurfaceFormatKHR& surface_format : surface_formats) - { - // Some drivers seem to return a SRGB format here (Intel Mesa). - // This results in gamma correction when presenting to the screen, which we don't want. - // Use a linear format instead, if this is the case. - m_surface_format.format = Util::GetLinearFormat(surface_format.format); - m_surface_format.colorSpace = surface_format.colorSpace; - return true; - } - - Panic("Failed to find a suitable format for swap chain buffers."); - return false; -} - -bool SwapChain::SelectPresentMode() -{ - VkResult res; - u32 mode_count; - res = - vkGetPhysicalDeviceSurfacePresentModesKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, &mode_count, nullptr); - if (res != VK_SUCCESS || mode_count == 0) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); - return false; - } - - std::vector present_modes(mode_count); - res = vkGetPhysicalDeviceSurfacePresentModesKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, &mode_count, - present_modes.data()); - Assert(res == VK_SUCCESS); - - // Checks if a particular mode is supported, if it is, returns that mode. - auto CheckForMode = [&present_modes](VkPresentModeKHR check_mode) { - auto it = std::find_if(present_modes.begin(), present_modes.end(), - [check_mode](VkPresentModeKHR mode) { return check_mode == mode; }); - return it != present_modes.end(); - }; - - // If vsync is enabled, use VK_PRESENT_MODE_FIFO_KHR. - // This check should not fail with conforming drivers, as the FIFO present mode is mandated by - // the specification (VK_KHR_swapchain). In case it isn't though, fall through to any other mode. - if (m_vsync_enabled && CheckForMode(VK_PRESENT_MODE_FIFO_KHR)) - { - m_present_mode = VK_PRESENT_MODE_FIFO_KHR; - return true; - } - - // Prefer screen-tearing, if possible, for lowest latency. - if (CheckForMode(VK_PRESENT_MODE_IMMEDIATE_KHR)) - { - m_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; - return true; - } - - // Use optimized-vsync above vsync. - if (CheckForMode(VK_PRESENT_MODE_MAILBOX_KHR)) - { - m_present_mode = VK_PRESENT_MODE_MAILBOX_KHR; - return true; - } - - // Fall back to whatever is available. - m_present_mode = present_modes[0]; - return true; -} - -bool SwapChain::CreateSwapChain() -{ - // Look up surface properties to determine image count and dimensions - VkSurfaceCapabilitiesKHR surface_capabilities; - VkResult res = - vkGetPhysicalDeviceSurfaceCapabilitiesKHR(g_vulkan_context->GetPhysicalDevice(), m_surface, &surface_capabilities); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR failed: "); - return false; - } - - // Select swap chain format and present mode - if (!SelectSurfaceFormat() || !SelectPresentMode()) - return false; - - // Select number of images in swap chain, we prefer one buffer in the background to work on - u32 image_count = std::max(surface_capabilities.minImageCount, 2u); - - // maxImageCount can be zero, in which case there isn't an upper limit on the number of buffers. - if (surface_capabilities.maxImageCount > 0) - image_count = std::min(image_count, surface_capabilities.maxImageCount); - - // Determine the dimensions of the swap chain. Values of -1 indicate the size we specify here - // determines window size? - VkExtent2D size = surface_capabilities.currentExtent; -#ifndef ANDROID - if (size.width == UINT32_MAX) -#endif - { - size.width = m_window_info.surface_width; - size.height = m_window_info.surface_height; - } - size.width = - std::clamp(size.width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width); - size.height = - std::clamp(size.height, surface_capabilities.minImageExtent.height, surface_capabilities.maxImageExtent.height); - - // Prefer identity transform if possible - VkSurfaceTransformFlagBitsKHR transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - if (!(surface_capabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)) - transform = surface_capabilities.currentTransform; - - // Select swap chain flags, we only need a colour attachment - VkImageUsageFlags image_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - if (!(surface_capabilities.supportedUsageFlags & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) - { - Log_ErrorPrintf("Vulkan: Swap chain does not support usage as color attachment"); - return false; - } - - // Store the old/current swap chain when recreating for resize - VkSwapchainKHR old_swap_chain = m_swap_chain; - m_swap_chain = VK_NULL_HANDLE; - - // Now we can actually create the swap chain - VkSwapchainCreateInfoKHR swap_chain_info = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, - nullptr, - 0, - m_surface, - image_count, - m_surface_format.format, - m_surface_format.colorSpace, - size, - 1u, - image_usage, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr, - transform, - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - m_present_mode, - VK_TRUE, - old_swap_chain}; - std::array indices = {{ - g_vulkan_context->GetGraphicsQueueFamilyIndex(), - g_vulkan_context->GetPresentQueueFamilyIndex(), - }}; - if (g_vulkan_context->GetGraphicsQueueFamilyIndex() != g_vulkan_context->GetPresentQueueFamilyIndex()) - { - swap_chain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; - swap_chain_info.queueFamilyIndexCount = 2; - swap_chain_info.pQueueFamilyIndices = indices.data(); - } - - if (m_swap_chain == VK_NULL_HANDLE) - { - res = vkCreateSwapchainKHR(g_vulkan_context->GetDevice(), &swap_chain_info, nullptr, &m_swap_chain); - } - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSwapchainKHR failed: "); - return false; - } - - // Now destroy the old swap chain, since it's been recreated. - // We can do this immediately since all work should have been completed before calling resize. - if (old_swap_chain != VK_NULL_HANDLE) - vkDestroySwapchainKHR(g_vulkan_context->GetDevice(), old_swap_chain, nullptr); - - m_window_info.surface_width = std::max(1u, size.width); - m_window_info.surface_height = std::max(1u, size.height); - return true; -} - -bool SwapChain::SetupSwapChainImages() -{ - Assert(m_images.empty()); - - u32 image_count; - VkResult res = vkGetSwapchainImagesKHR(g_vulkan_context->GetDevice(), m_swap_chain, &image_count, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetSwapchainImagesKHR failed: "); - return false; - } - - std::vector images(image_count); - res = vkGetSwapchainImagesKHR(g_vulkan_context->GetDevice(), m_swap_chain, &image_count, images.data()); - Assert(res == VK_SUCCESS); - - m_load_render_pass = g_vulkan_context->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, - VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_LOAD); - m_clear_render_pass = g_vulkan_context->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, - VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (m_load_render_pass == VK_NULL_HANDLE || m_clear_render_pass == VK_NULL_HANDLE) - { - Panic("Failed to get swap chain render passes."); - return false; - } - - m_images.reserve(image_count); - for (u32 i = 0; i < image_count; i++) - { - SwapChainImage image; - image.image = images[i]; - - // Create texture object, which creates a view of the backbuffer - if (!image.texture.Adopt(image.image, VK_IMAGE_VIEW_TYPE_2D, m_window_info.surface_width, - m_window_info.surface_height, 1, 1, m_surface_format.format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_LAYOUT_UNDEFINED)) - { - return false; - } - - image.framebuffer = image.texture.CreateFramebuffer(m_load_render_pass); - if (image.framebuffer == VK_NULL_HANDLE) - return false; - - m_images.emplace_back(std::move(image)); - } - - return true; -} - -void SwapChain::DestroySwapChainImages() -{ - for (auto& it : m_images) - { - // Images themselves are cleaned up by the swap chain object - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), it.framebuffer, nullptr); - } - m_images.clear(); -} - -void SwapChain::DestroySwapChain() -{ - if (m_swap_chain == VK_NULL_HANDLE) - return; - - vkDestroySwapchainKHR(g_vulkan_context->GetDevice(), m_swap_chain, nullptr); - m_swap_chain = VK_NULL_HANDLE; -} - -VkResult SwapChain::AcquireNextImage() -{ - if (!m_swap_chain) - return VK_ERROR_SURFACE_LOST_KHR; - - return vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, m_image_available_semaphore, - VK_NULL_HANDLE, &m_current_image); -} - -bool SwapChain::ResizeSwapChain(u32 new_width /* = 0 */, u32 new_height /* = 0 */) -{ - DestroySwapChainImages(); - - if (new_width != 0 && new_height != 0) - { - m_window_info.surface_width = new_width; - m_window_info.surface_height = new_height; - } - - if (!CreateSwapChain() || !SetupSwapChainImages()) - { - DestroySwapChainImages(); - DestroySwapChain(); - return false; - } - - return true; -} - -bool SwapChain::RecreateSwapChain() -{ - DestroySwapChainImages(); - - if (!CreateSwapChain() || !SetupSwapChainImages()) - { - DestroySwapChainImages(); - DestroySwapChain(); - return false; - } - - return true; -} - -bool SwapChain::SetVSync(bool enabled) -{ - if (m_vsync_enabled == enabled) - return true; - - // Recreate the swap chain with the new present mode. - m_vsync_enabled = enabled; - return RecreateSwapChain(); -} - -bool SwapChain::RecreateSurface(const WindowInfo& new_wi) -{ - // Destroy the old swap chain, images, and surface. - DestroySwapChainImages(); - DestroySwapChain(); - DestroySurface(); - - // Re-create the surface with the new native handle - m_window_info = new_wi; - m_surface = - CreateVulkanSurface(g_vulkan_context->GetVulkanInstance(), g_vulkan_context->GetPhysicalDevice(), &m_window_info); - if (m_surface == VK_NULL_HANDLE) - return false; - - // The validation layers get angry at us if we don't call this before creating the swapchain. - VkBool32 present_supported = VK_TRUE; - VkResult res = - vkGetPhysicalDeviceSurfaceSupportKHR(g_vulkan_context->GetPhysicalDevice(), - g_vulkan_context->GetPresentQueueFamilyIndex(), m_surface, &present_supported); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); - return false; - } - if (!present_supported) - { - Panic("Recreated surface does not support presenting."); - return false; - } - - // Finally re-create the swap chain - if (!CreateSwapChain() || !SetupSwapChainImages()) - return false; - - return true; -} - -void SwapChain::DestroySurface() -{ - if (m_surface == VK_NULL_HANDLE) - return; - - DestroyVulkanSurface(g_vulkan_context->GetVulkanInstance(), &m_window_info, m_surface); - m_surface = VK_NULL_HANDLE; -} - -bool SwapChain::CreateSemaphores() -{ - // Create two semaphores, one that is triggered when the swapchain buffer is ready, another after - // submit and before present - VkSemaphoreCreateInfo semaphore_info = { - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0 // VkSemaphoreCreateFlags flags - }; - - VkResult res; - if ((res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_image_available_semaphore)) != VK_SUCCESS || - (res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_rendering_finished_semaphore)) != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); - return false; - } - - return true; -} - -void SwapChain::DestroySemaphores() -{ - if (m_image_available_semaphore != VK_NULL_HANDLE) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_image_available_semaphore, nullptr); - m_image_available_semaphore = VK_NULL_HANDLE; - } - - if (m_rendering_finished_semaphore != VK_NULL_HANDLE) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_rendering_finished_semaphore, nullptr); - m_rendering_finished_semaphore = VK_NULL_HANDLE; - } -} - -} // namespace Vulkan diff --git a/src/common/vulkan/swap_chain.h b/src/common/vulkan/swap_chain.h deleted file mode 100644 index c7ef1c99e..000000000 --- a/src/common/vulkan/swap_chain.h +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "../types.h" -#include "../window_info.h" -#include "texture.h" -#include "loader.h" -#include -#include - -namespace Vulkan { - -class SwapChain -{ -public: - SwapChain(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync); - ~SwapChain(); - - // Creates a vulkan-renderable surface for the specified window handle. - static VkSurfaceKHR CreateVulkanSurface(VkInstance instance, VkPhysicalDevice physical_device, WindowInfo* wi); - - // Destroys a previously-created surface. - static void DestroyVulkanSurface(VkInstance instance, WindowInfo* wi, VkSurfaceKHR surface); - - // Enumerates fullscreen modes for window info. - struct FullscreenModeInfo - { - u32 width; - u32 height; - float refresh_rate; - }; - static std::vector - GetSurfaceFullscreenModes(VkInstance instance, VkPhysicalDevice physical_device, const WindowInfo& wi); - - // Create a new swap chain from a pre-existing surface. - static std::unique_ptr Create(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync); - - ALWAYS_INLINE VkSurfaceKHR GetSurface() const { return m_surface; } - ALWAYS_INLINE VkSurfaceFormatKHR GetSurfaceFormat() const { return m_surface_format; } - ALWAYS_INLINE VkFormat GetTextureFormat() const { return m_surface_format.format; } - ALWAYS_INLINE bool IsVSyncEnabled() const { return m_vsync_enabled; } - ALWAYS_INLINE VkSwapchainKHR GetSwapChain() const { return m_swap_chain; } - ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; } - ALWAYS_INLINE u32 GetWidth() const { return m_window_info.surface_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_window_info.surface_height; } - ALWAYS_INLINE u32 GetCurrentImageIndex() const { return m_current_image; } - ALWAYS_INLINE u32 GetImageCount() const { return static_cast(m_images.size()); } - ALWAYS_INLINE VkImage GetCurrentImage() const { return m_images[m_current_image].image; } - ALWAYS_INLINE const Texture& GetCurrentTexture() const { return m_images[m_current_image].texture; } - ALWAYS_INLINE Texture& GetCurrentTexture() { return m_images[m_current_image].texture; } - ALWAYS_INLINE VkFramebuffer GetCurrentFramebuffer() const { return m_images[m_current_image].framebuffer; } - ALWAYS_INLINE VkRenderPass GetLoadRenderPass() const { return m_load_render_pass; } - ALWAYS_INLINE VkRenderPass GetClearRenderPass() const { return m_clear_render_pass; } - ALWAYS_INLINE VkSemaphore GetImageAvailableSemaphore() const { return m_image_available_semaphore; } - ALWAYS_INLINE VkSemaphore GetRenderingFinishedSemaphore() const { return m_rendering_finished_semaphore; } - VkResult AcquireNextImage(); - - bool RecreateSurface(const WindowInfo& new_wi); - bool ResizeSwapChain(u32 new_width = 0, u32 new_height = 0); - bool RecreateSwapChain(); - - // Change vsync enabled state. This may fail as it causes a swapchain recreation. - bool SetVSync(bool enabled); - -private: - bool SelectSurfaceFormat(); - bool SelectPresentMode(); - - bool CreateSwapChain(); - void DestroySwapChain(); - - bool SetupSwapChainImages(); - void DestroySwapChainImages(); - - void DestroySurface(); - - bool CreateSemaphores(); - void DestroySemaphores(); - - struct SwapChainImage - { - VkImage image; - Texture texture; - VkFramebuffer framebuffer; - }; - - WindowInfo m_window_info; - - VkSurfaceKHR m_surface = VK_NULL_HANDLE; - VkSurfaceFormatKHR m_surface_format = {}; - VkPresentModeKHR m_present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR; - - VkRenderPass m_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; - - VkSemaphore m_image_available_semaphore = VK_NULL_HANDLE; - VkSemaphore m_rendering_finished_semaphore = VK_NULL_HANDLE; - - VkSwapchainKHR m_swap_chain = VK_NULL_HANDLE; - std::vector m_images; - u32 m_current_image = 0; - bool m_vsync_enabled = false; -}; - -} // namespace Vulkan diff --git a/src/common/vulkan/texture.cpp b/src/common/vulkan/texture.cpp deleted file mode 100644 index 8ded320b7..000000000 --- a/src/common/vulkan/texture.cpp +++ /dev/null @@ -1,534 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "texture.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -#include "../string_util.h" -#include "context.h" -#include "util.h" -#include -Log_SetChannel(Texture); - -static constexpr std::array(GPUTexture::Format::Count)> s_vk_mapping = { - {VK_FORMAT_UNDEFINED, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R5G6B5_UNORM_PACK16, - VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R8_UNORM, VK_FORMAT_D16_UNORM}}; - -static constexpr VkComponentMapping s_identity_swizzle{VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - -Vulkan::Texture::Texture() = default; - -Vulkan::Texture::Texture(Texture&& move) - : m_view_type(move.m_view_type), m_layout(move.m_layout), m_image(move.m_image), m_allocation(move.m_allocation), - m_view(move.m_view) -{ - m_width = move.m_width; - m_height = move.m_height; - m_layers = move.m_layers; - m_levels = move.m_levels; - m_samples = move.m_samples; - - move.ClearBaseProperties(); - move.m_view_type = VK_IMAGE_VIEW_TYPE_2D; - move.m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - move.m_image = VK_NULL_HANDLE; - move.m_allocation = VK_NULL_HANDLE; - move.m_view = VK_NULL_HANDLE; -} - -Vulkan::Texture::~Texture() -{ - if (IsValid()) - Destroy(true); -} - -VkFormat Vulkan::Texture::GetVkFormat(Format format) -{ - return s_vk_mapping[static_cast(format)]; -} - -GPUTexture::Format Vulkan::Texture::LookupBaseFormat(VkFormat vformat) -{ - for (u32 i = 0; i < static_cast(s_vk_mapping.size()); i++) - { - if (s_vk_mapping[i] == vformat) - return static_cast(i); - } - return GPUTexture::Format::Unknown; -} - -bool Vulkan::Texture::IsValid() const -{ - return (m_image != VK_NULL_HANDLE); -} - -Vulkan::Texture& Vulkan::Texture::operator=(Texture&& move) -{ - if (IsValid()) - Destroy(true); - - std::swap(m_width, move.m_width); - std::swap(m_height, move.m_height); - std::swap(m_levels, move.m_levels); - std::swap(m_layers, move.m_layers); - std::swap(m_format, move.m_format); - std::swap(m_samples, move.m_samples); - std::swap(m_view_type, move.m_view_type); - std::swap(m_layout, move.m_layout); - std::swap(m_image, move.m_image); - std::swap(m_allocation, move.m_allocation); - std::swap(m_view, move.m_view); - - return *this; -} - -bool Vulkan::Texture::Create(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, - VkSampleCountFlagBits samples, VkImageViewType view_type, VkImageTiling tiling, - VkImageUsageFlags usage, bool dedicated_memory /* = false */, - const VkComponentMapping* swizzle /* = nullptr */) -{ - const VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - nullptr, - 0, - VK_IMAGE_TYPE_2D, - format, - {width, height, 1}, - levels, - layers, - samples, - tiling, - usage, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr, - VK_IMAGE_LAYOUT_UNDEFINED}; - - VmaAllocationCreateInfo aci = {}; - aci.usage = VMA_MEMORY_USAGE_GPU_ONLY; - aci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; - aci.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - if (dedicated_memory) - aci.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - - VkImage image = VK_NULL_HANDLE; - VmaAllocation allocation = VK_NULL_HANDLE; - VkResult res = vmaCreateImage(g_vulkan_context->GetAllocator(), &image_info, &aci, &image, &allocation, nullptr); - if (res != VK_SUCCESS && dedicated_memory) - { - // try without dedicated memory - aci.flags &= ~VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - res = vmaCreateImage(g_vulkan_context->GetAllocator(), &image_info, &aci, &image, &allocation, nullptr); - } - if (res == VK_ERROR_OUT_OF_DEVICE_MEMORY) - { - Log_WarningPrintf("Failed to allocate device memory for %ux%u texture", width, height); - return false; - } - else if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateImage failed: "); - return false; - } - - const VkImageViewCreateInfo view_info = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - image, - view_type, - format, - swizzle ? *swizzle : s_identity_swizzle, - {Util::IsDepthFormat(format) ? - static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - VkImageView view = VK_NULL_HANDLE; - res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - vmaDestroyImage(g_vulkan_context->GetAllocator(), image, allocation); - return false; - } - - if (IsValid()) - Destroy(true); - - m_width = static_cast(width); - m_height = static_cast(height); - m_levels = static_cast(levels); - m_layers = static_cast(layers); - m_samples = static_cast(samples); - m_format = LookupBaseFormat(format); - m_view_type = view_type; - m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - m_image = image; - m_allocation = allocation; - m_view = view; - return true; -} - -bool Vulkan::Texture::Adopt(VkImage existing_image, VkImageViewType view_type, u32 width, u32 height, u32 levels, - u32 layers, VkFormat format, VkSampleCountFlagBits samples, VkImageLayout layout, - const VkComponentMapping* swizzle /* = nullptr */) -{ - // Only need to create the image view, this is mainly for swap chains. - const VkImageViewCreateInfo view_info = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - existing_image, - view_type, - format, - swizzle ? *swizzle : s_identity_swizzle, - {Util::IsDepthFormat(format) ? - static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - // Memory is managed by the owner of the image. - VkImageView view = VK_NULL_HANDLE; - VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - return false; - } - - if (IsValid()) - Destroy(true); - - m_width = static_cast(width); - m_height = static_cast(height); - m_levels = static_cast(levels); - m_layers = static_cast(layers); - m_format = LookupBaseFormat(format); - m_samples = static_cast(samples); - m_view_type = view_type; - m_layout = layout; - m_image = existing_image; - m_view = view; - return true; -} - -void Vulkan::Texture::Destroy(bool defer /* = true */) -{ - if (m_view != VK_NULL_HANDLE) - { - if (defer) - g_vulkan_context->DeferImageViewDestruction(m_view); - else - vkDestroyImageView(g_vulkan_context->GetDevice(), m_view, nullptr); - m_view = VK_NULL_HANDLE; - } - - // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) - if (m_allocation != VK_NULL_HANDLE) - { - Assert(m_image != VK_NULL_HANDLE); - if (defer) - g_vulkan_context->DeferImageDestruction(m_image, m_allocation); - else - vmaDestroyImage(g_vulkan_context->GetAllocator(), m_image, m_allocation); - m_image = VK_NULL_HANDLE; - m_allocation = VK_NULL_HANDLE; - } - - ClearBaseProperties(); - m_samples = VK_SAMPLE_COUNT_1_BIT; - m_view_type = VK_IMAGE_VIEW_TYPE_2D; - m_layout = VK_IMAGE_LAYOUT_UNDEFINED; -} - -void Vulkan::Texture::OverrideImageLayout(VkImageLayout new_layout) -{ - m_layout = new_layout; -} - -void Vulkan::Texture::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) -{ - if (m_layout == new_layout) - return; - const Vulkan::Util::DebugScope debugScope(command_buffer, "Texture::TransitionToLayout: %s", - Vulkan::Util::VkImageLayoutToString(new_layout)); - - TransitionSubresourcesToLayout(command_buffer, 0, m_levels, 0, m_layers, m_layout, new_layout); - - m_layout = new_layout; -} - -void Vulkan::Texture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_level, u32 num_levels, - u32 start_layer, u32 num_layers, VkImageLayout old_layout, - VkImageLayout new_layout) -{ - const Vulkan::Util::DebugScope debugScope( - command_buffer, "Texture::TransitionSubresourcesToLayout: Lvl:[%u,%u) Lyr:[%u,%u) %s -> %s", start_level, - start_level + num_levels, start_layer, start_layer + num_layers, Vulkan::Util::VkImageLayoutToString(old_layout), - Vulkan::Util::VkImageLayoutToString(new_layout)); - - VkImageMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAccessFlags srcAccessMask - 0, // VkAccessFlags dstAccessMask - old_layout, // VkImageLayout oldLayout - new_layout, // VkImageLayout newLayout - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_image, // VkImage image - {static_cast(IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT), - start_level, num_levels, start_layer, num_layers} // VkImageSubresourceRange subresourceRange - }; - - // srcStageMask -> Stages that must complete before the barrier - // dstStageMask -> Stages that must wait for after the barrier before beginning - VkPipelineStageFlags srcStageMask, dstStageMask; - switch (old_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - barrier.srcAccessMask = 0; - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - // Image has been pre-initialized by the host, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - // Image was being used as a color attachment, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - // Image was being used as a depthstencil attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - // Image was being used as a shader resource, make sure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - // Image was being used as a copy source, ensure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - // Image was being used as a copy destination, ensure all writes have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - default: - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - } - - switch (new_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - barrier.dstAccessMask = 0; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - default: - dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - break; - } - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, &barrier); -} - -VkFramebuffer Vulkan::Texture::CreateFramebuffer(VkRenderPass render_pass) -{ - const VkFramebufferCreateInfo ci = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, nullptr, 0u, render_pass, 1, &m_view, m_width, m_height, m_layers}; - VkFramebuffer fb = VK_NULL_HANDLE; - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &ci, nullptr, &fb); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer() failed: "); - return VK_NULL_HANDLE; - } - - return fb; -} - -void Vulkan::Texture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 level, u32 layer, u32 x, u32 y, u32 width, - u32 height, VkBuffer buffer, u32 buffer_offset, u32 row_length) -{ - // If we're previously undefined, don't leave any images in this layout. - const VkImageLayout old_layout = m_layout; - if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) - TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - else if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) - TransitionSubresourcesToLayout(cmdbuf, level, 1, layer, 1, old_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - const VkBufferImageCopy bic = {static_cast(buffer_offset), - row_length, - height, - {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1u}, - {static_cast(x), static_cast(y), 0}, - {width, height, 1u}}; - - vkCmdCopyBufferToImage(cmdbuf, buffer, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bic); - - if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && old_layout != VK_IMAGE_LAYOUT_UNDEFINED) - TransitionSubresourcesToLayout(cmdbuf, level, 1, layer, 1, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, old_layout); -} - -u32 Vulkan::Texture::CalcUpdatePitch(u32 width) const -{ - return Common::AlignUp(width * GetPixelSize(), g_vulkan_context->GetBufferCopyRowPitchAlignment()); -} - -u32 Vulkan::Texture::CalcUpdateRowLength(u32 pitch) const -{ - return pitch / GetPixelSize(); -} - -bool Vulkan::Texture::BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) -{ - const u32 pitch = CalcUpdatePitch(width); - const u32 required_size = pitch * height; - StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); - if (required_size > buffer.GetCurrentSize()) - return false; - - // TODO: allocate temporary buffer if this fails... - if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) - { - g_vulkan_context->ExecuteCommandBuffer(false); - if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) - return false; - } - - *out_buffer = buffer.GetCurrentHostPointer(); - *out_pitch = pitch; - return true; -} - -void Vulkan::Texture::EndUpdate(u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer) -{ - const u32 pitch = CalcUpdatePitch(width); - const u32 required_size = pitch * height; - - StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); - const u32 buffer_offset = buffer.GetCurrentOffset(); - buffer.CommitMemory(required_size); - - UpdateFromBuffer(g_vulkan_context->GetCurrentCommandBuffer(), level, layer, x, y, width, height, buffer.GetBuffer(), - buffer_offset, CalcUpdateRowLength(pitch)); -} - -bool Vulkan::Texture::Update(u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer, const void* data, - u32 data_pitch) -{ - const u32 pitch = CalcUpdatePitch(width); - const u32 row_length = CalcUpdateRowLength(pitch); - const u32 required_size = pitch * height; - StreamBuffer& sbuffer = g_vulkan_context->GetTextureUploadBuffer(); - - // If the texture is larger than half our streaming buffer size, use a separate buffer. - // Otherwise allocation will either fail, or require lots of cmdbuffer submissions. - if (required_size > (g_vulkan_context->GetTextureUploadBuffer().GetCurrentSize() / 2)) - { - const u32 size = data_pitch * height; - const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - nullptr, - 0, - static_cast(size), - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr}; - - // Don't worry about setting the coherent bit for this upload, the main reason we had - // that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on - // smaller uploads, but we're writing to the whole thing anyway. - VmaAllocationCreateInfo aci = {}; - aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - - VmaAllocationInfo ai; - VkBuffer buffer; - VmaAllocation allocation; - VkResult res = vmaCreateBuffer(g_vulkan_context->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); - return false; - } - - // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. - g_vulkan_context->DeferBufferDestruction(buffer, allocation); - - StringUtil::StrideMemCpy(ai.pMappedData, pitch, data, data_pitch, std::min(data_pitch, pitch), height); - vmaFlushAllocation(g_vulkan_context->GetAllocator(), allocation, 0, size); - - UpdateFromBuffer(g_vulkan_context->GetCurrentCommandBuffer(), level, layer, x, y, width, height, buffer, 0, - row_length); - return true; - } - else - { - if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) - { - g_vulkan_context->ExecuteCommandBuffer(false); - if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) - { - Log_ErrorPrintf("Failed to reserve texture upload memory (%u bytes).", required_size); - return false; - } - } - - const u32 buffer_offset = sbuffer.GetCurrentOffset(); - StringUtil::StrideMemCpy(sbuffer.GetCurrentHostPointer(), pitch, data, data_pitch, std::min(data_pitch, pitch), - height); - sbuffer.CommitMemory(required_size); - - UpdateFromBuffer(g_vulkan_context->GetCurrentCommandBuffer(), level, layer, x, y, width, height, - sbuffer.GetBuffer(), buffer_offset, row_length); - return true; - } -} diff --git a/src/common/vulkan/texture.h b/src/common/vulkan/texture.h deleted file mode 100644 index 644e1d87d..000000000 --- a/src/common/vulkan/texture.h +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "../gpu_texture.h" -#include "loader.h" -#include -#include - -namespace Vulkan { - -class Texture final : public GPUTexture -{ -public: - Texture(); - Texture(Texture&& move); - Texture(const Texture&) = delete; - ~Texture(); - - Texture& operator=(Texture&& move); - Texture& operator=(const Texture&) = delete; - - static VkFormat GetVkFormat(Format format); - static Format LookupBaseFormat(VkFormat vformat); - - bool IsValid() const override; - - /// An image is considered owned/managed if we control the memory. - ALWAYS_INLINE bool IsOwned() const { return (m_allocation != VK_NULL_HANDLE); } - - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - ALWAYS_INLINE u32 GetLevels() const { return m_levels; } - ALWAYS_INLINE u32 GetLayers() const { return m_layers; } - - ALWAYS_INLINE VkFormat GetVkFormat() const { return GetVkFormat(m_format); } - ALWAYS_INLINE VkSampleCountFlagBits GetVkSamples() const { return static_cast(m_samples); } - ALWAYS_INLINE VkImageLayout GetLayout() const { return m_layout; } - ALWAYS_INLINE VkImageViewType GetViewType() const { return m_view_type; } - ALWAYS_INLINE VkImage GetImage() const { return m_image; } - ALWAYS_INLINE VmaAllocation GetAllocation() const { return m_allocation; } - ALWAYS_INLINE VkImageView GetView() const { return m_view; } - - bool Create(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, VkSampleCountFlagBits samples, - VkImageViewType view_type, VkImageTiling tiling, VkImageUsageFlags usage, bool dedicated_memory = false, - const VkComponentMapping* swizzle = nullptr); - - bool Adopt(VkImage existing_image, VkImageViewType view_type, u32 width, u32 height, u32 levels, u32 layers, - VkFormat format, VkSampleCountFlagBits samples, VkImageLayout layout, - const VkComponentMapping* swizzle = nullptr); - - void Destroy(bool defer = true); - - // Used when the render pass is changing the image layout, or to force it to - // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is - // irrelevant and will not be loaded. - void OverrideImageLayout(VkImageLayout new_layout); - - void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); - void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_level, u32 num_levels, u32 start_layer, - u32 num_layers, VkImageLayout old_layout, VkImageLayout new_layout); - - VkFramebuffer CreateFramebuffer(VkRenderPass render_pass); - - void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 level, u32 layer, u32 x, u32 y, u32 width, u32 height, - VkBuffer buffer, u32 buffer_offset, u32 row_length); - - u32 CalcUpdatePitch(u32 width) const; - u32 CalcUpdateRowLength(u32 pitch) const; - bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch); - void EndUpdate(u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer); - bool Update(u32 x, u32 y, u32 width, u32 height, u32 level, u32 layer, const void* data, u32 data_pitch); - -private: - VkImageViewType m_view_type = VK_IMAGE_VIEW_TYPE_2D; - VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImage m_image = VK_NULL_HANDLE; - VmaAllocation m_allocation = VK_NULL_HANDLE; - VkImageView m_view = VK_NULL_HANDLE; -}; - -} // namespace Vulkan diff --git a/src/common/vulkan/util.cpp b/src/common/vulkan/util.cpp deleted file mode 100644 index 75690fb50..000000000 --- a/src/common/vulkan/util.cpp +++ /dev/null @@ -1,602 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "util.h" -#include "../assert.h" -#include "../log.h" -#include "../string_util.h" -#include "context.h" -#include "shader_compiler.h" - -#include - -bool Vulkan::Util::IsDepthFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return true; - default: - return false; - } -} - -bool Vulkan::Util::IsCompressedFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return true; - - default: - return false; - } -} - -VkFormat Vulkan::Util::GetLinearFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_R8_SRGB: - return VK_FORMAT_R8_UNORM; - case VK_FORMAT_R8G8_SRGB: - return VK_FORMAT_R8G8_UNORM; - case VK_FORMAT_R8G8B8_SRGB: - return VK_FORMAT_R8G8B8_UNORM; - case VK_FORMAT_R8G8B8A8_SRGB: - return VK_FORMAT_R8G8B8A8_UNORM; - case VK_FORMAT_B8G8R8_SRGB: - return VK_FORMAT_B8G8R8_UNORM; - case VK_FORMAT_B8G8R8A8_SRGB: - return VK_FORMAT_B8G8R8A8_UNORM; - default: - return format; - } -} - -u32 Vulkan::Util::GetTexelSize(VkFormat format) -{ - // Only contains pixel formats we use. - switch (format) - { - case VK_FORMAT_R32_SFLOAT: - return 4; - - case VK_FORMAT_D32_SFLOAT: - return 4; - - case VK_FORMAT_R8G8B8A8_UNORM: - return 4; - - case VK_FORMAT_B8G8R8A8_UNORM: - return 4; - - case VK_FORMAT_R5G5B5A1_UNORM_PACK16: - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - case VK_FORMAT_B5G6R5_UNORM_PACK16: - return 2; - - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - return 8; - - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 16; - - default: - Panic("Unhandled pixel format"); - return 1; - } -} - -u32 Vulkan::Util::GetBlockSize(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 4; - - default: - return 1; - } -} - -VkRect2D Vulkan::Util::ClampRect2D(const VkRect2D& rect, u32 width, u32 height) -{ - VkRect2D out; - out.offset.x = std::clamp(rect.offset.x, 0, static_cast(width - 1)); - out.offset.y = std::clamp(rect.offset.y, 0, static_cast(height - 1)); - out.extent.width = std::min(rect.extent.width, width - static_cast(rect.offset.x)); - out.extent.height = std::min(rect.extent.height, height - static_cast(rect.offset.y)); - return out; -} - -VkBlendFactor Vulkan::Util::GetAlphaBlendFactor(VkBlendFactor factor) -{ - switch (factor) - { - case VK_BLEND_FACTOR_SRC_COLOR: - return VK_BLEND_FACTOR_SRC_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_COLOR: - return VK_BLEND_FACTOR_DST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; - default: - return factor; - } -} - -void Vulkan::Util::SetViewport(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - const VkViewport vp{static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - vkCmdSetViewport(command_buffer, 0, 1, &vp); -} - -void Vulkan::Util::SetScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height) -{ - const VkRect2D scissor{{x, y}, {static_cast(width), static_cast(height)}}; - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -void Vulkan::Util::SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth /* = 0.0f */, float max_depth /* = 1.0f */) -{ - const VkViewport vp{static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - const VkRect2D scissor{{x, y}, {static_cast(width), static_cast(height)}}; - vkCmdSetViewport(command_buffer, 0, 1, &vp); - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -void Vulkan::Util::SetViewportAndClampScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - const VkViewport vp{static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - vkCmdSetViewport(command_buffer, 0, 1, &vp); - - const int cx = std::max(x, 0); - const int cy = std::max(y, 0); - const int cwidth = width - (cx - x); - const int cheight = height - (cy - y); - const VkRect2D scissor{{cx, cy}, {static_cast(cwidth), static_cast(cheight)}}; - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -void Vulkan::Util::SafeDestroyFramebuffer(VkFramebuffer& fb) -{ - if (fb != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), fb, nullptr); - fb = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyShaderModule(VkShaderModule& sm) -{ - if (sm != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), sm, nullptr); - sm = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyPipeline(VkPipeline& p) -{ - if (p != VK_NULL_HANDLE) - { - vkDestroyPipeline(g_vulkan_context->GetDevice(), p, nullptr); - p = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyPipelineLayout(VkPipelineLayout& pl) -{ - if (pl != VK_NULL_HANDLE) - { - vkDestroyPipelineLayout(g_vulkan_context->GetDevice(), pl, nullptr); - pl = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyDescriptorSetLayout(VkDescriptorSetLayout& dsl) -{ - if (dsl != VK_NULL_HANDLE) - { - vkDestroyDescriptorSetLayout(g_vulkan_context->GetDevice(), dsl, nullptr); - dsl = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyBufferView(VkBufferView& bv) -{ - if (bv != VK_NULL_HANDLE) - { - vkDestroyBufferView(g_vulkan_context->GetDevice(), bv, nullptr); - bv = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroyImageView(VkImageView& iv) -{ - if (iv != VK_NULL_HANDLE) - { - vkDestroyImageView(g_vulkan_context->GetDevice(), iv, nullptr); - iv = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroySampler(VkSampler& samp) -{ - if (samp != VK_NULL_HANDLE) - { - vkDestroySampler(g_vulkan_context->GetDevice(), samp, nullptr); - samp = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeDestroySemaphore(VkSemaphore& sem) -{ - if (sem != VK_NULL_HANDLE) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), sem, nullptr); - sem = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::SafeFreeGlobalDescriptorSet(VkDescriptorSet& ds) -{ - if (ds != VK_NULL_HANDLE) - { - g_vulkan_context->FreeGlobalDescriptorSet(ds); - ds = VK_NULL_HANDLE; - } -} - -void Vulkan::Util::BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, VkAccessFlags src_access_mask, - VkAccessFlags dst_access_mask, VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask) -{ - VkBufferMemoryBarrier buffer_info = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - src_access_mask, // VkAccessFlags srcAccessMask - dst_access_mask, // VkAccessFlags dstAccessMask - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - buffer, // VkBuffer buffer - offset, // VkDeviceSize offset - size // VkDeviceSize size - }; - - vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, &buffer_info, 0, nullptr); -} - -void Vulkan::Util::AddPointerToChain(void* head, const void* ptr) -{ - VkBaseInStructure* last_st = static_cast(head); - while (last_st->pNext) - { - if (last_st->pNext == ptr) - return; - - last_st = const_cast(last_st->pNext); - } - - last_st->pNext = static_cast(ptr); -} - -VkShaderModule Vulkan::Util::CreateShaderModule(const u32* spv, size_t spv_word_count) -{ - VkShaderModuleCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - info.codeSize = spv_word_count * sizeof(u32); - info.pCode = spv; - - VkShaderModule module; - VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &module); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); - return VK_NULL_HANDLE; - } - - return module; -} - -VkShaderModule Vulkan::Util::CompileAndCreateVertexShader(std::string_view source_code) -{ - std::optional code = ShaderCompiler::CompileVertexShader(source_code); - if (!code) - return VK_NULL_HANDLE; - - return CreateShaderModule(code->data(), code->size()); -} - -VkShaderModule Vulkan::Util::CompileAndCreateGeometryShader(std::string_view source_code) -{ - std::optional code = ShaderCompiler::CompileGeometryShader(source_code); - if (!code) - return VK_NULL_HANDLE; - - return CreateShaderModule(code->data(), code->size()); -} - -VkShaderModule Vulkan::Util::CompileAndCreateFragmentShader(std::string_view source_code) -{ - std::optional code = ShaderCompiler::CompileFragmentShader(source_code); - if (!code) - return VK_NULL_HANDLE; - - return CreateShaderModule(code->data(), code->size()); -} - -VkShaderModule Vulkan::Util::CompileAndCreateComputeShader(std::string_view source_code) -{ - std::optional code = ShaderCompiler::CompileComputeShader(source_code); - if (!code) - return VK_NULL_HANDLE; - - return CreateShaderModule(code->data(), code->size()); -} - -const char* Vulkan::Util::VkResultToString(VkResult res) -{ - switch (res) - { - case VK_SUCCESS: - return "VK_SUCCESS"; - - case VK_NOT_READY: - return "VK_NOT_READY"; - - case VK_TIMEOUT: - return "VK_TIMEOUT"; - - case VK_EVENT_SET: - return "VK_EVENT_SET"; - - case VK_EVENT_RESET: - return "VK_EVENT_RESET"; - - case VK_INCOMPLETE: - return "VK_INCOMPLETE"; - - case VK_ERROR_OUT_OF_HOST_MEMORY: - return "VK_ERROR_OUT_OF_HOST_MEMORY"; - - case VK_ERROR_OUT_OF_DEVICE_MEMORY: - return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; - - case VK_ERROR_INITIALIZATION_FAILED: - return "VK_ERROR_INITIALIZATION_FAILED"; - - case VK_ERROR_DEVICE_LOST: - return "VK_ERROR_DEVICE_LOST"; - - case VK_ERROR_MEMORY_MAP_FAILED: - return "VK_ERROR_MEMORY_MAP_FAILED"; - - case VK_ERROR_LAYER_NOT_PRESENT: - return "VK_ERROR_LAYER_NOT_PRESENT"; - - case VK_ERROR_EXTENSION_NOT_PRESENT: - return "VK_ERROR_EXTENSION_NOT_PRESENT"; - - case VK_ERROR_FEATURE_NOT_PRESENT: - return "VK_ERROR_FEATURE_NOT_PRESENT"; - - case VK_ERROR_INCOMPATIBLE_DRIVER: - return "VK_ERROR_INCOMPATIBLE_DRIVER"; - - case VK_ERROR_TOO_MANY_OBJECTS: - return "VK_ERROR_TOO_MANY_OBJECTS"; - - case VK_ERROR_FORMAT_NOT_SUPPORTED: - return "VK_ERROR_FORMAT_NOT_SUPPORTED"; - - case VK_ERROR_SURFACE_LOST_KHR: - return "VK_ERROR_SURFACE_LOST_KHR"; - - case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: - return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; - - case VK_SUBOPTIMAL_KHR: - return "VK_SUBOPTIMAL_KHR"; - - case VK_ERROR_OUT_OF_DATE_KHR: - return "VK_ERROR_OUT_OF_DATE_KHR"; - - case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: - return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; - - case VK_ERROR_VALIDATION_FAILED_EXT: - return "VK_ERROR_VALIDATION_FAILED_EXT"; - - case VK_ERROR_INVALID_SHADER_NV: - return "VK_ERROR_INVALID_SHADER_NV"; - - default: - return "UNKNOWN_VK_RESULT"; - } -} - -const char* Vulkan::Util::VkImageLayoutToString(VkImageLayout layout) -{ - switch (layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - return "VK_IMAGE_LAYOUT_UNDEFINED"; - - case VK_IMAGE_LAYOUT_GENERAL: - return "VK_IMAGE_LAYOUT_GENERAL"; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return "VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL"; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL"; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL"; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return "VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL"; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - return "VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL"; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - return "VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL"; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - return "VK_IMAGE_LAYOUT_PREINITIALIZED"; - - case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL"; - - case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL"; - - case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL"; - - case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: - return "VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL"; - - case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL: - return "VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL"; - - case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL: - return "VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL"; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - return "VK_IMAGE_LAYOUT_PRESENT_SRC_KHR"; - - case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: - return "VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR"; - - case VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV: - return "VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV"; - - case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT: - return "VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT"; - - default: - return "UNKNOWN_VK_RESULT"; - } -} - -void Vulkan::Util::LogVulkanResult(int level, const char* func_name, VkResult res, const char* msg, ...) -{ - std::va_list ap; - va_start(ap, msg); - std::string real_msg = StringUtil::StdStringFromFormatV(msg, ap); - va_end(ap); - - Log::Writef("Vulkan", func_name, static_cast(level), "(%s) %s (%d: %s)", func_name, real_msg.c_str(), - static_cast(res), VkResultToString(res)); -} - -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - -u8 Vulkan::Util::DebugScope::depth = 0; -u8 Vulkan::Util::DebugScope::depth = 0; - -static std::array Palette(float phase, const std::array& a, const std::array& b, - const std::array& c, const std::array& d) -{ - std::array result; - result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); - result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); - result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); - result[3] = 1.0f; - return result; -} - -Vulkan::Util::DebugScope::DebugScope(VkCommandBuffer context, const char* format, ...) - : command_buffer(context) -{ - if (command_buffer) - { - std::va_list ap; - - SmallString str; - va_start(ap, format); - str.FormatVA(format, ap); - va_end(ap); - - ++depth; - const float depth_phase = depth / static_cast(max_depth); - BeginDebugScope( - command_buffer, str, - Palette(depth_phase, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f})); - } -} - -Vulkan::Util::DebugScope::~DebugScope() -{ - if (command_buffer) - { - --depth; - EndDebugScope(command_buffer); - } -} - -Vulkan::Util::DebugScope::DebugScope(VkQueue context, const char* format, ...) : queue(context) -{ - if (queue) - { - std::va_list ap; - va_start(ap, format); - - SmallString str; - str.FormatVA(format, ap); - va_end(ap); - - const float depth_phase = depth / static_cast(max_depth); - BeginDebugScope( - queue, str, - Palette(depth_phase, {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, {2.0f, 1.0f, 0.0f}, {0.5f, 0.20f, 0.25f})); - ++depth; - } -} - -Vulkan::Util::DebugScope::~DebugScope() -{ - if (queue) - { - --depth; - EndDebugScope(queue); - } -} - -#endif diff --git a/src/common/vulkan/util.h b/src/common/vulkan/util.h deleted file mode 100644 index 8e06be421..000000000 --- a/src/common/vulkan/util.h +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "../string.h" -#include "../types.h" -#include "context.h" -#include "loader.h" -#include -#include -#include -#include -namespace Vulkan::Util { - -inline constexpr u32 MakeRGBA8Color(float r, float g, float b, float a) -{ - return (static_cast(std::clamp(static_cast(r * 255.0f), 0, 255)) << 0) | - (static_cast(std::clamp(static_cast(g * 255.0f), 0, 255)) << 8) | - (static_cast(std::clamp(static_cast(b * 255.0f), 0, 255)) << 16) | - (static_cast(std::clamp(static_cast(a * 255.0f), 0, 255)) << 24); -} - -bool IsDepthFormat(VkFormat format); -bool IsCompressedFormat(VkFormat format); -VkFormat GetLinearFormat(VkFormat format); -u32 GetTexelSize(VkFormat format); -u32 GetBlockSize(VkFormat format); - -// Clamps a VkRect2D to the specified dimensions. -VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height); - -// Map {SRC,DST}_COLOR to {SRC,DST}_ALPHA -VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor); - -// Safe destroy helpers -void SafeDestroyFramebuffer(VkFramebuffer& fb); -void SafeDestroyShaderModule(VkShaderModule& sm); -void SafeDestroyPipeline(VkPipeline& p); -void SafeDestroyPipelineLayout(VkPipelineLayout& pl); -void SafeDestroyDescriptorSetLayout(VkDescriptorSetLayout& dsl); -void SafeDestroyBufferView(VkBufferView& bv); -void SafeDestroyImageView(VkImageView& iv); -void SafeDestroySampler(VkSampler& samp); -void SafeDestroySemaphore(VkSemaphore& sem); -void SafeFreeGlobalDescriptorSet(VkDescriptorSet& ds); - -void SetViewport(VkCommandBuffer command_buffer, int x, int y, int width, int height, float min_depth = 0.0f, - float max_depth = 1.0f); -void SetScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height); - -// Combines viewport and scissor updates -void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, float min_depth = 0.0f, - float max_depth = 1.0f); -void SetViewportAndClampScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth = 0.0f, float max_depth = 1.0f); - -// Wrapper for creating an barrier on a buffer -void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, VkAccessFlags src_access_mask, - VkAccessFlags dst_access_mask, VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask); - -// Adds a structure to a chain. -void AddPointerToChain(void* head, const void* ptr); - -// Create a shader module from the specified SPIR-V. -VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count); - -// Compile a vertex shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateVertexShader(std::string_view source_code); - -// Compile a geometry shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateGeometryShader(std::string_view source_code); - -// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateFragmentShader(std::string_view source_code); - -// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateComputeShader(std::string_view source_code); - -const char* VkResultToString(VkResult res); -const char* VkImageLayoutToString(VkImageLayout layout); -void LogVulkanResult(int level, const char* func_name, VkResult res, const char* msg, ...) printflike(4, 5); - -#define LOG_VULKAN_ERROR(res, ...) ::Vulkan::Util::LogVulkanResult(1, __func__, res, __VA_ARGS__) - -#if defined(_DEBUG) - -// We can't use the templates below because they're all the same type on 32-bit. -#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || \ - defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) -#define ENABLE_VULKAN_DEBUG_OBJECTS 1 -#endif - -#endif - -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - -// Provides a compile-time mapping between a Vulkan-type into its matching VkObjectType -template -struct VkObjectTypeMap; - -// clang-format off -template<> struct VkObjectTypeMap { using type = VkInstance ; static constexpr VkObjectType value = VK_OBJECT_TYPE_INSTANCE; }; -template<> struct VkObjectTypeMap { using type = VkPhysicalDevice ; static constexpr VkObjectType value = VK_OBJECT_TYPE_PHYSICAL_DEVICE; }; -template<> struct VkObjectTypeMap { using type = VkDevice ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEVICE; }; -template<> struct VkObjectTypeMap { using type = VkQueue ; static constexpr VkObjectType value = VK_OBJECT_TYPE_QUEUE; }; -template<> struct VkObjectTypeMap { using type = VkSemaphore ; static constexpr VkObjectType value = VK_OBJECT_TYPE_SEMAPHORE; }; -template<> struct VkObjectTypeMap { using type = VkCommandBuffer ; static constexpr VkObjectType value = VK_OBJECT_TYPE_COMMAND_BUFFER; }; -template<> struct VkObjectTypeMap { using type = VkFence ; static constexpr VkObjectType value = VK_OBJECT_TYPE_FENCE; }; -template<> struct VkObjectTypeMap { using type = VkDeviceMemory ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEVICE_MEMORY; }; -template<> struct VkObjectTypeMap { using type = VkBuffer ; static constexpr VkObjectType value = VK_OBJECT_TYPE_BUFFER; }; -template<> struct VkObjectTypeMap { using type = VkImage ; static constexpr VkObjectType value = VK_OBJECT_TYPE_IMAGE; }; -template<> struct VkObjectTypeMap { using type = VkEvent ; static constexpr VkObjectType value = VK_OBJECT_TYPE_EVENT; }; -template<> struct VkObjectTypeMap { using type = VkQueryPool ; static constexpr VkObjectType value = VK_OBJECT_TYPE_QUERY_POOL; }; -template<> struct VkObjectTypeMap { using type = VkBufferView ; static constexpr VkObjectType value = VK_OBJECT_TYPE_BUFFER_VIEW; }; -template<> struct VkObjectTypeMap { using type = VkImageView ; static constexpr VkObjectType value = VK_OBJECT_TYPE_IMAGE_VIEW; }; -template<> struct VkObjectTypeMap { using type = VkShaderModule ; static constexpr VkObjectType value = VK_OBJECT_TYPE_SHADER_MODULE; }; -template<> struct VkObjectTypeMap { using type = VkPipelineCache ; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE_CACHE; }; -template<> struct VkObjectTypeMap { using type = VkPipelineLayout ; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE_LAYOUT; }; -template<> struct VkObjectTypeMap { using type = VkRenderPass ; static constexpr VkObjectType value = VK_OBJECT_TYPE_RENDER_PASS; }; -template<> struct VkObjectTypeMap { using type = VkPipeline ; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE; }; -template<> struct VkObjectTypeMap { using type = VkDescriptorSetLayout ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT; }; -template<> struct VkObjectTypeMap { using type = VkSampler ; static constexpr VkObjectType value = VK_OBJECT_TYPE_SAMPLER; }; -template<> struct VkObjectTypeMap { using type = VkDescriptorPool ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_POOL; }; -template<> struct VkObjectTypeMap { using type = VkDescriptorSet ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_SET; }; -template<> struct VkObjectTypeMap { using type = VkFramebuffer ; static constexpr VkObjectType value = VK_OBJECT_TYPE_FRAMEBUFFER; }; -template<> struct VkObjectTypeMap { using type = VkCommandPool ; static constexpr VkObjectType value = VK_OBJECT_TYPE_COMMAND_POOL; }; -template<> struct VkObjectTypeMap { using type = VkDescriptorUpdateTemplate; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE; }; -template<> struct VkObjectTypeMap { using type = VkSurfaceKHR ; static constexpr VkObjectType value = VK_OBJECT_TYPE_SURFACE_KHR; }; -template<> struct VkObjectTypeMap { using type = VkSwapchainKHR ; static constexpr VkObjectType value = VK_OBJECT_TYPE_SWAPCHAIN_KHR; }; -template<> struct VkObjectTypeMap { using type = VkDebugUtilsMessengerEXT ; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT; }; -// clang-format on - -#endif - -inline void SetObjectName(VkDevice device, void* object_handle, VkObjectType object_type, const char* format, ...) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkSetDebugUtilsObjectNameEXT) - { - return; - } - std::va_list ap; - - SmallString str; - va_start(ap, format); - str.FormatVA(format, ap); - va_end(ap); - - const VkDebugUtilsObjectNameInfoEXT nameInfo{VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr, object_type, - reinterpret_cast(object_handle), str}; - vkSetDebugUtilsObjectNameEXT(device, &nameInfo); -#endif -} - -template -inline void SetObjectName(VkDevice device, T object_handle, const char* format, ...) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - std::va_list ap; - va_start(ap, format); - SetObjectName(device, reinterpret_cast((typename VkObjectTypeMap::type)object_handle), - VkObjectTypeMap::value, format, ap); - va_end(ap); -#endif -} - -template<> -inline void SetObjectName(VkDevice device, VmaAllocation object_handle, const char* format, ...) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - std::va_list ap; - SmallString str; - va_start(ap, format); - str.FormatVA(format, ap); - va_end(ap); - - vmaSetAllocationName(g_vulkan_context->GetAllocator(), object_handle, str); -#endif -} - -// Command buffer debug utils -inline void BeginDebugScope(VkCommandBuffer command_buffer, const char* scope_name, - const std::array& scope_color = {0.5, 0.5, 0.5, 1.0}) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkCmdBeginDebugUtilsLabelEXT) - { - return; - } - const VkDebugUtilsLabelEXT label{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - nullptr, - scope_name, - {scope_color[0], scope_color[1], scope_color[2], scope_color[3]}}; - vkCmdBeginDebugUtilsLabelEXT(command_buffer, &label); -#endif -} - -inline void EndDebugScope(VkCommandBuffer command_buffer) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkCmdEndDebugUtilsLabelEXT) - { - return; - } - vkCmdEndDebugUtilsLabelEXT(command_buffer); -#endif -} - -inline void InsertDebugLabel(VkCommandBuffer command_buffer, const char* label_name, - const std::array& label_color = {0.5, 0.5, 0.5, 1.0}) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkCmdInsertDebugUtilsLabelEXT) - { - return; - } - const VkDebugUtilsLabelEXT label{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - nullptr, - label_name, - {label_color[0], label_color[1], label_color[2], label_color[3]}}; - vkCmdInsertDebugUtilsLabelEXT(command_buffer, &label); -#endif -} - -// Queue debug utils -inline void BeginDebugScope(VkQueue queue, const char* scope_name, - const std::array& scope_color = {0.75, 0.75, 0.75, 1.0}) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkQueueBeginDebugUtilsLabelEXT) - { - return; - } - const VkDebugUtilsLabelEXT label{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - nullptr, - scope_name, - {scope_color[0], scope_color[1], scope_color[2], scope_color[3]}}; - vkQueueBeginDebugUtilsLabelEXT(queue, &label); -#endif -} - -inline void EndDebugScope(VkQueue queue) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkQueueEndDebugUtilsLabelEXT) - { - return; - } - vkQueueEndDebugUtilsLabelEXT(queue); -#endif -} - -inline void InsertDebugLabel(VkQueue queue, const char* label_name, - const std::array& label_color = {0.75, 0.75, 0.75, 1.0}) -{ -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS - if (!vkQueueInsertDebugUtilsLabelEXT) - { - return; - } - const VkDebugUtilsLabelEXT label{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, - nullptr, - label_name, - {label_color[0], label_color[1], label_color[2], label_color[3]}}; - vkQueueInsertDebugUtilsLabelEXT(queue, &label); -#endif -} - -template -class DebugScope -{ -public: - DebugScope(T context, const char* format, ...) {} -}; - -#ifdef ENABLE_VULKAN_DEBUG_OBJECTS -template<> -class DebugScope -{ -public: - DebugScope(VkCommandBuffer context, const char* format, ...); - ~DebugScope(); - -private: - static constexpr u8 max_depth = 8u; - static u8 depth; - VkCommandBuffer command_buffer; -}; - -template<> -class DebugScope -{ -public: - DebugScope(VkQueue context, const char* format, ...); - ~DebugScope(); - -private: - static constexpr u8 max_depth = 8u; - static u8 depth; - VkQueue queue; -}; -#endif - -} // namespace Vulkan::Util diff --git a/src/common/win32_progress_callback.cpp b/src/common/win32_progress_callback.cpp index 264da5334..cd61d22d7 100644 --- a/src/common/win32_progress_callback.cpp +++ b/src/common/win32_progress_callback.cpp @@ -4,7 +4,6 @@ #include "win32_progress_callback.h" #include "common/log.h" #include -#pragma comment(lib, "Comctl32.lib") Log_SetChannel(Win32ProgressCallback); Win32ProgressCallback::Win32ProgressCallback() : BaseProgressCallback() diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 90a704bfd..228c81f0e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -125,35 +125,6 @@ target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") target_link_libraries(core PUBLIC Threads::Threads common util zlib) target_link_libraries(core PRIVATE stb xxhash imgui rapidjson) -if(WIN32) - target_sources(core PRIVATE - gpu_hw_d3d12.cpp - gpu_hw_d3d12.h - gpu_hw_d3d11.cpp - gpu_hw_d3d11.h - ) - target_link_libraries(core PRIVATE winmm.lib) -endif() - -if(ENABLE_CUBEB) - target_compile_definitions(core PUBLIC "WITH_CUBEB=1") -endif() - -if(ENABLE_OPENGL) - target_sources(core PRIVATE - gpu_hw_opengl.cpp - gpu_hw_opengl.h - ) - target_link_libraries(core PRIVATE glad) -endif() - -if(ENABLE_VULKAN) - target_sources(core PRIVATE - gpu_hw_vulkan.cpp - gpu_hw_vulkan.h - ) -endif() - if(${CPU_ARCH} STREQUAL "x64") target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak") target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1" "WITH_MMAP_FASTMEM=1") diff --git a/src/core/common_host.cpp b/src/core/common_host.cpp index 98e1eb56f..7d94a0ac4 100644 --- a/src/core/common_host.cpp +++ b/src/core/common_host.cpp @@ -19,6 +19,7 @@ #include "resources.h" #include "save_state_version.h" #include "settings.h" +#include "shader_cache_version.h" #include "spu.h" #include "system.h" #include "texture_replacements.h" @@ -27,7 +28,7 @@ #include "scmversion/scmversion.h" #include "util/audio_stream.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/imgui_fullscreen.h" #include "util/imgui_manager.h" #include "util/ini_settings_interface.h" @@ -60,21 +61,11 @@ #ifdef _WIN32 #include "common/windows_headers.h" -#include "util/d3d11_host_display.h" -#include "util/d3d12_host_display.h" #include #include #include #endif -#ifdef WITH_OPENGL -#include "util/opengl_host_display.h" -#endif - -#ifdef WITH_VULKAN -#include "util/vulkan_host_display.h" -#endif - Log_SetChannel(CommonHostInterface); namespace CommonHost { @@ -144,52 +135,89 @@ void CommonHost::PumpMessagesOnCPUThread() #endif } -std::unique_ptr Host::CreateDisplayForAPI(RenderAPI api) +bool Host::CreateGPUDevice(RenderAPI api) { - switch (api) + DebugAssert(!g_gpu_device); + + Log_InfoPrintf("Trying to create a %s GPU device...", GPUDevice::RenderAPIToString(api)); + g_gpu_device = GPUDevice::CreateDeviceForAPI(api); + + // TODO: FSUI should always use vsync.. + const bool vsync = System::IsValid() ? System::ShouldUseVSync() : g_settings.video_sync_enabled; + if (!g_gpu_device || !g_gpu_device->Create(g_settings.gpu_adapter, + g_settings.gpu_disable_shader_cache ? std::string_view() : + std::string_view(EmuFolders::Cache), + SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device, vsync, + g_settings.gpu_threaded_presentation)) { -#ifdef WITH_VULKAN - case RenderAPI::Vulkan: - return std::make_unique(); -#endif - -#ifdef WITH_OPENGL - case RenderAPI::OpenGL: - case RenderAPI::OpenGLES: - return std::make_unique(); -#endif - -#ifdef _WIN32 - case RenderAPI::D3D12: - return std::make_unique(); - - case RenderAPI::D3D11: - return std::make_unique(); -#endif - - default: -#if defined(_WIN32) && defined(_M_ARM64) - return std::make_unique(); -#elif defined(_WIN32) - return std::make_unique(); -#elif defined(WITH_OPENGL) - return std::make_unique(); -#elif defined(WITH_VULKAN) - return std::make_unique(); -#else - return {}; -#endif + Log_ErrorPrintf("Failed to initialize GPU device."); + if (g_gpu_device) + g_gpu_device->Destroy(); + g_gpu_device.reset(); + return false; + } + + if (!ImGuiManager::Initialize()) + { + Log_ErrorPrintf("Failed to initialize ImGuiManager."); + g_gpu_device->Destroy(); + g_gpu_device.reset(); + return false; } -} -bool CommonHost::CreateHostDisplayResources() -{ return true; } -void CommonHost::ReleaseHostDisplayResources() +void Host::UpdateDisplayWindow() { + if (!g_gpu_device) + return; + + if (!g_gpu_device->UpdateWindow()) + { + Host::ReportErrorAsync("Error", "Failed to change window after update. The log may contain more information."); + return; + } + + ImGuiManager::WindowResized(); + + // If we're paused, re-present the current frame at the new window size. + if (System::IsValid() && System::IsPaused()) + RenderDisplay(false); +} + +void Host::ResizeDisplayWindow(s32 width, s32 height, float scale) +{ + if (!g_gpu_device) + return; + + Log_DevPrintf("Display window resized to %dx%d", width, height); + + g_gpu_device->ResizeWindow(width, height, scale); + ImGuiManager::WindowResized(); + + // If we're paused, re-present the current frame at the new window size. + if (System::IsValid()) + { + if (System::IsPaused()) + RenderDisplay(false); + + System::HostDisplayResized(); + } +} + +void Host::ReleaseGPUDevice() +{ + if (!g_gpu_device) + return; + SaveStateSelectorUI::DestroyTextures(); + FullscreenUI::Shutdown(); + ImGuiManager::Shutdown(); + + Log_InfoPrintf("Destroying %s GPU device...", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); + g_gpu_device->Destroy(); + g_gpu_device.reset(); } #ifndef __ANDROID__ @@ -458,7 +486,10 @@ void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/, } ImGui::End(); - ImGui::SetNextWindowSize(ImVec2(width, (has_progress ? 50.0f : 30.0f) * scale), ImGuiCond_Always); + const float padding_and_rounding = 15.0f * scale; + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, padding_and_rounding); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(padding_and_rounding, padding_and_rounding)); + ImGui::SetNextWindowSize(ImVec2(width, (has_progress ? 80.0f : 50.0f) * scale), ImGuiCond_Always); ImGui::SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, (io.DisplaySize.y * 0.5f) + (100.0f * scale)), ImGuiCond_Always, ImVec2(0.5f, 0.0f)); if (ImGui::Begin("LoadingScreen", nullptr, @@ -468,7 +499,17 @@ void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/, { if (has_progress) { - ImGui::Text("%s: %d/%d", message, progress_value, progress_max); + ImGui::TextUnformatted(message); + + TinyString buf; + buf.Fmt("{}/{}", progress_value, progress_max); + + const ImVec2 prog_size = ImGui::CalcTextSize(buf.GetCharArray(), buf.GetCharArray() + buf.GetLength()); + ImGui::SameLine(); + ImGui::SetCursorPosX(width - padding_and_rounding - prog_size.x); + ImGui::TextUnformatted(buf.GetCharArray(), buf.GetCharArray() + buf.GetLength()); + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + 5.0f); + ImGui::ProgressBar(static_cast(progress_value) / static_cast(progress_max - progress_min), ImVec2(-1.0f, 0.0f), ""); Log_InfoPrintf("%s: %d/%d", message, progress_value, progress_max); @@ -482,9 +523,10 @@ void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/, } } ImGui::End(); + ImGui::PopStyleVar(2); ImGui::EndFrame(); - g_host_display->Render(false); + g_gpu_device->Render(false); ImGui::NewFrame(); } @@ -628,7 +670,6 @@ static void HotkeyModifyResolutionScale(s32 increment) { g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::InvalidateDisplay(); } @@ -888,7 +929,6 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? @@ -957,7 +997,6 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATE_NOOP("Hotkeys", "Graphics"), g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXPDepth", g_settings.gpu_pgxp_depth_buffer ? @@ -977,7 +1016,6 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_ g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ? diff --git a/src/core/common_host.h b/src/core/common_host.h index 2adb82f1b..c6efb1e27 100644 --- a/src/core/common_host.h +++ b/src/core/common_host.h @@ -35,8 +35,6 @@ void OnSystemPaused(); void OnSystemResumed(); void OnGameChanged(const std::string& disc_path, const std::string& game_serial, const std::string& game_name); void PumpMessagesOnCPUThread(); -bool CreateHostDisplayResources(); -void ReleaseHostDisplayResources(); /// Returns the time elapsed in the current play session. u64 GetSessionPlayedTime(); diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 6c157979d..74a2b4d7e 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -39,12 +39,7 @@ - - - - true - @@ -52,9 +47,6 @@ - - true - @@ -113,12 +105,7 @@ - - - - true - @@ -128,9 +115,6 @@ - - true - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 7cc8f8d1f..854d98dea 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -8,7 +8,6 @@ - @@ -23,7 +22,6 @@ - @@ -41,7 +39,6 @@ - @@ -53,7 +50,6 @@ - @@ -74,7 +70,6 @@ - @@ -88,7 +83,6 @@ - @@ -104,7 +98,6 @@ - @@ -117,9 +110,7 @@ - - @@ -132,5 +123,6 @@ + \ No newline at end of file diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index e6a3be796..3b0958c50 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -18,10 +18,10 @@ #include "resources.h" #include "settings.h" #include "system.h" -#include "util/host_display.h" #include "scmversion/scmversion.h" +#include "util/gpu_device.h" #include "util/imgui_fullscreen.h" #include "util/imgui_manager.h" #include "util/ini_settings_interface.h" @@ -404,7 +404,7 @@ static std::unique_ptr s_game_settings_entry; static std::vector> s_game_list_directories_cache; static std::vector s_graphics_adapter_list_cache; static std::vector s_fullscreen_mode_list_cache; -static FrontendCommon::PostProcessingChain s_postprocessing_chain; +static PostProcessingChain s_postprocessing_chain; static std::vector s_hotkey_list_cache; static std::atomic_bool s_settings_changed{false}; static std::atomic_bool s_game_settings_changed{false}; @@ -2394,7 +2394,7 @@ void FullscreenUI::SwitchToGameSettings(const GameList::Entry* entry) void FullscreenUI::PopulateGraphicsAdapterList() { - HostDisplay::AdapterAndModeList ml(g_host_display->GetAdapterAndModeList()); + GPUDevice::AdapterAndModeList ml(g_gpu_device->GetAdapterAndModeList()); s_graphics_adapter_list_cache = std::move(ml.adapter_names); s_fullscreen_mode_list_cache = std::move(ml.fullscreen_modes); s_fullscreen_mode_list_cache.insert(s_fullscreen_mode_list_cache.begin(), FSUI_STR("Borderless Fullscreen")); @@ -3653,7 +3653,7 @@ void FullscreenUI::DrawDisplaySettingsPage() adapter.has_value() ? (adapter->empty() ? FSUI_CSTR("Default") : adapter->c_str()) : FSUI_CSTR("Use Global Setting"))) { - HostDisplay::AdapterAndModeList aml(g_host_display->GetAdapterAndModeList()); + GPUDevice::AdapterAndModeList aml(g_gpu_device->GetAdapterAndModeList()); ImGuiFullscreen::ChoiceDialogOptions options; options.reserve(aml.adapter_names.size() + 2); @@ -3698,7 +3698,7 @@ void FullscreenUI::DrawDisplaySettingsPage() fsmode.has_value() ? (fsmode->empty() ? FSUI_CSTR("Borderless Fullscreen") : fsmode->c_str()) : FSUI_CSTR("Use Global Setting"))) { - HostDisplay::AdapterAndModeList aml(g_host_display->GetAdapterAndModeList()); + GPUDevice::AdapterAndModeList aml(g_gpu_device->GetAdapterAndModeList()); ImGuiFullscreen::ChoiceDialogOptions options; options.reserve(aml.fullscreen_modes.size() + 2); @@ -3939,7 +3939,7 @@ void FullscreenUI::SavePostProcessingChain() const std::string config(s_postprocessing_chain.GetConfigString()); bsi->SetStringValue("Display", "PostProcessChain", config.c_str()); if (bsi->GetBoolValue("Display", "PostProcessing", false)) - g_host_display->SetPostProcessingChain(config); + g_gpu_device->SetPostProcessingChain(config); if (IsEditingGameSettings(bsi)) { s_game_settings_interface->Save(); @@ -3975,7 +3975,7 @@ void FullscreenUI::DrawPostProcessingSettingsPage() bsi->GetBoolValue("Display", "PostProcessing", false))) { const std::string chain(bsi->GetStringValue("Display", "PostProcessChain", "")); - g_host_display->SetPostProcessingChain(chain); + g_gpu_device->SetPostProcessingChain(chain); if (chain.empty()) ShowToast(std::string(), FSUI_STR("Post-processing chain is empty.")); else @@ -3987,7 +3987,7 @@ void FullscreenUI::DrawPostProcessingSettingsPage() if (MenuButton(FSUI_ICONSTR(ICON_FA_PLUS, "Add Shader"), FSUI_CSTR("Adds a new shader to the chain."))) { ImGuiFullscreen::ChoiceDialogOptions options; - for (std::string& name : FrontendCommon::PostProcessingChain::GetAvailableShaderNames()) + for (std::string& name : PostProcessingChain::GetAvailableShaderNames()) options.emplace_back(std::move(name), false); OpenChoiceDialog(FSUI_ICONSTR(ICON_FA_PLUS, "Add Shader"), false, std::move(options), @@ -4034,8 +4034,8 @@ void FullscreenUI::DrawPostProcessingSettingsPage() for (u32 stage_index = 0; stage_index < s_postprocessing_chain.GetStageCount(); stage_index++) { ImGui::PushID(stage_index); - FrontendCommon::PostProcessingShader& stage = s_postprocessing_chain.GetShaderStage(stage_index); - str.Fmt(FSUI_FSTR("Stage {}: {}"), stage_index + 1, stage.GetName()); + PostProcessingShader* stage = s_postprocessing_chain.GetShaderStage(stage_index); + str.Fmt(FSUI_FSTR("Stage {}: {}"), stage_index + 1, stage->GetName()); MenuHeading(str); if (MenuButton(FSUI_ICONSTR(ICON_FA_TIMES, "Remove From Chain"), FSUI_CSTR("Removes this shader from the chain."))) @@ -4059,11 +4059,11 @@ void FullscreenUI::DrawPostProcessingSettingsPage() postprocessing_action_index = stage_index; } - for (FrontendCommon::PostProcessingShader::Option& opt : stage.GetOptions()) + for (PostProcessingShader::Option& opt : stage->GetOptions()) { switch (opt.type) { - case FrontendCommon::PostProcessingShader::Option::Type::Bool: + case PostProcessingShader::Option::Type::Bool: { bool value = (opt.value[0].int_value != 0); tstr.Fmt(ICON_FA_COGS "{}", opt.ui_name); @@ -4078,7 +4078,7 @@ void FullscreenUI::DrawPostProcessingSettingsPage() } break; - case FrontendCommon::PostProcessingShader::Option::Type::Float: + case PostProcessingShader::Option::Type::Float: { tstr.Fmt(ICON_FA_RULER_VERTICAL "{}##{}", opt.ui_name, opt.name); str.Fmt(FSUI_FSTR("Value: {} | Default: {} | Minimum: {} | Maximum: {}"), opt.value[0].float_value, @@ -4181,7 +4181,7 @@ void FullscreenUI::DrawPostProcessingSettingsPage() } break; - case FrontendCommon::PostProcessingShader::Option::Type::Int: + case PostProcessingShader::Option::Type::Int: { tstr.Fmt(ICON_FA_RULER_VERTICAL "{}##{}", opt.ui_name, opt.name); str.Fmt(FSUI_FSTR("Value: {} | Default: {} | Minimum: {} | Maximum: {}"), opt.value[0].int_value, @@ -4293,9 +4293,9 @@ void FullscreenUI::DrawPostProcessingSettingsPage() { case POSTPROCESSING_ACTION_REMOVE: { - FrontendCommon::PostProcessingShader& stage = s_postprocessing_chain.GetShaderStage(postprocessing_action_index); + PostProcessingShader* stage = s_postprocessing_chain.GetShaderStage(postprocessing_action_index); ShowToast(std::string(), - fmt::format(FSUI_FSTR("Removed stage {} ({})."), postprocessing_action_index + 1, stage.GetName())); + fmt::format(FSUI_FSTR("Removed stage {} ({})."), postprocessing_action_index + 1, stage->GetName())); s_postprocessing_chain.RemoveStage(postprocessing_action_index); SavePostProcessingChain(); } @@ -4598,7 +4598,9 @@ void FullscreenUI::DrawAchievementsSettingsPage() EndMenuButtons(); } -void FullscreenUI::DrawAchievementsLoginWindow() {} +void FullscreenUI::DrawAchievementsLoginWindow() +{ +} #endif @@ -5016,15 +5018,16 @@ void FullscreenUI::PopulateSaveStateScreenshot(SaveStateListEntry* li, const Ext li->preview_texture.reset(); if (ssi && !ssi->screenshot_data.empty()) { - li->preview_texture = - g_host_display->CreateTexture(ssi->screenshot_width, ssi->screenshot_height, 1, 1, 1, GPUTexture::Format::RGBA8, - ssi->screenshot_data.data(), sizeof(u32) * ssi->screenshot_width, false); + li->preview_texture = g_gpu_device->CreateTexture( + ssi->screenshot_width, ssi->screenshot_height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, + ssi->screenshot_data.data(), sizeof(u32) * ssi->screenshot_width, false); } else { - li->preview_texture = g_host_display->CreateTexture( - Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, - Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, false); + li->preview_texture = g_gpu_device->CreateTexture( + Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, + false); } if (!li->preview_texture) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 239002a4f..6887d1234 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -8,13 +8,13 @@ #include "common/string_util.h" #include "dma.h" #include "host.h" -#include "util/host_display.h" #include "imgui.h" #include "interrupt_controller.h" #include "settings.h" #include "stb_image_write.h" #include "system.h" #include "timers.h" +#include "util/gpu_device.h" #include "util/state_wrapper.h" #include Log_SetChannel(GPU); @@ -27,8 +27,8 @@ GPU::GPU() = default; GPU::~GPU() { - if (g_host_display) - g_host_display->SetGPUTimingEnabled(false); + if (g_gpu_device) + g_gpu_device->SetGPUTimingEnabled(false); } bool GPU::Initialize() @@ -49,12 +49,12 @@ bool GPU::Initialize() UpdateCRTCConfig(); if (g_settings.display_post_processing && !g_settings.display_post_process_chain.empty() && - !g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain)) + !g_gpu_device->SetPostProcessingChain(g_settings.display_post_process_chain)) { Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Failed to load post processing shader chain."), 20.0f); } - g_host_display->SetGPUTimingEnabled(g_settings.display_show_gpu); + g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu); return true; } @@ -75,13 +75,7 @@ void GPU::UpdateSettings() // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); - g_host_display->SetGPUTimingEnabled(g_settings.display_show_gpu); -} - -bool GPU::IsHardwareRenderer() -{ - const GPURenderer renderer = GetRendererType(); - return (renderer != GPURenderer::Software); + g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu); } void GPU::CPUClockChanged() @@ -89,7 +83,9 @@ void GPU::CPUClockChanged() UpdateCRTCConfig(); } -void GPU::UpdateResolutionScale() {} +void GPU::UpdateResolutionScale() +{ +} std::tuple GPU::GetEffectiveDisplayResolution(bool scaled /* = true */) { @@ -168,6 +164,8 @@ void GPU::SoftReset() bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) { + FlushRender(); + if (sw.IsReading()) { // perform a reset to discard all pending draws/fb state @@ -293,9 +291,9 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ return !sw.HasError(); } -void GPU::ResetGraphicsAPIState() {} - -void GPU::RestoreGraphicsAPIState() {} +void GPU::RestoreGraphicsAPIState() +{ +} void GPU::UpdateDMARequest() { @@ -980,8 +978,8 @@ void GPU::UpdateCommandTickEvent() bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, float x_scale, u32* out_tick, u32* out_line) const { - auto [display_x, display_y] = g_host_display->ConvertWindowCoordinatesToDisplayCoordinates( - window_x, window_y, g_host_display->GetWindowWidth(), g_host_display->GetWindowHeight()); + auto [display_x, display_y] = g_gpu_device->ConvertWindowCoordinatesToDisplayCoordinates( + window_x, window_y, g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); if (x_scale != 1.0f) { @@ -1284,11 +1282,17 @@ void GPU::HandleGetGPUInfoCommand(u32 value) } } -void GPU::ClearDisplay() {} +void GPU::ClearDisplay() +{ +} -void GPU::UpdateDisplay() {} +void GPU::UpdateDisplay() +{ +} -void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} +void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ +} void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { @@ -1446,9 +1450,13 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he } } -void GPU::DispatchRenderCommand() {} +void GPU::DispatchRenderCommand() +{ +} -void GPU::FlushRender() {} +void GPU::FlushRender() +{ +} void GPU::SetDrawMode(u16 value) { @@ -1687,4 +1695,6 @@ void GPU::DrawDebugStateWindow() ImGui::End(); } -void GPU::DrawRendererStats(bool is_idle_frame) {} +void GPU::DrawRendererStats(bool is_idle_frame) +{ +} diff --git a/src/core/gpu.h b/src/core/gpu.h index 75ac564ec..70aa54855 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -17,13 +17,12 @@ class StateWrapper; -class HostDisplay; +class GPUDevice; class GPUTexture; class TimingEvent; -namespace Threading -{ +namespace Threading { class Thread; } @@ -80,21 +79,20 @@ public: GPU(); virtual ~GPU(); - virtual GPURenderer GetRendererType() const = 0; virtual const Threading::Thread* GetSWThread() const = 0; + virtual bool IsHardwareRenderer() const = 0; virtual bool Initialize(); virtual void Reset(bool clear_vram); virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); // Graphics API state reset/restore - call when drawing the UI etc. - virtual void ResetGraphicsAPIState(); + // TODO: replace with "invalidate cached state" virtual void RestoreGraphicsAPIState(); // Render statistics debug window. void DrawDebugStateWindow(); - bool IsHardwareRenderer(); void CPUClockChanged(); // MMIO access @@ -161,25 +159,7 @@ public: float ComputeVerticalFrequency() const; float GetDisplayAspectRatio() const; -#ifdef _WIN32 - // gpu_hw_d3d11.cpp - static std::unique_ptr CreateHardwareD3D11Renderer(); - - // gpu_hw_d3d12.cpp - static std::unique_ptr CreateHardwareD3D12Renderer(); -#endif - -#ifdef WITH_OPENGL - // gpu_hw_opengl.cpp - static std::unique_ptr CreateHardwareOpenGLRenderer(); -#endif - -#ifdef WITH_VULKAN - // gpu_hw_vulkan.cpp - static std::unique_ptr CreateHardwareVulkanRenderer(); -#endif - - // gpu_sw.cpp + static std::unique_ptr CreateHardwareRenderer(); static std::unique_ptr CreateSoftwareRenderer(); // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. @@ -192,6 +172,9 @@ public: // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); + // Ensures all buffered vertices are drawn. + virtual void FlushRender(); + protected: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -291,7 +274,6 @@ protected: virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void DispatchRenderCommand(); - virtual void FlushRender(); virtual void ClearDisplay(); virtual void UpdateDisplay(); virtual void DrawRendererStats(bool is_idle_frame); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b0a3f98ac..b094d65ba 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1,11 +1,14 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_hw.h" #include "common/align.h" #include "common/assert.h" #include "common/log.h" +#include "common/scoped_guard.h" +#include "common/string_util.h" #include "cpu_core.h" +#include "gpu_hw_shadergen.h" #include "gpu_sw_backend.h" #include "host.h" #include "imgui.h" @@ -18,6 +21,15 @@ #include Log_SetChannel(GPU_HW); +// TODO: instead of full state restore, only restore what changed + +static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; +static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; + +#ifdef _DEBUG +static u32 s_draw_number = 0; +#endif + template ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) { @@ -38,13 +50,77 @@ ALWAYS_INLINE static bool ShouldDisableColorPerspective() return g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction && !g_settings.gpu_pgxp_color_correction; } +/// Returns true if the specified texture filtering mode requires dual-source blending. +static bool TextureFilterRequiresDualSourceBlend(GPUTextureFilter filter) +{ + return (filter == GPUTextureFilter::Bilinear || filter == GPUTextureFilter::JINC2 || filter == GPUTextureFilter::xBR); +} + +/// Computes the area affected by a VRAM transfer, including wrap-around of X. +static Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) +{ + Common::Rectangle out_rc = Common::Rectangle::FromExtents(x % VRAM_WIDTH, y % VRAM_HEIGHT, width, height); + if (out_rc.right > VRAM_WIDTH) + { + out_rc.left = 0; + out_rc.right = VRAM_WIDTH; + } + if (out_rc.bottom > VRAM_HEIGHT) + { + out_rc.top = 0; + out_rc.bottom = VRAM_HEIGHT; + } + return out_rc; +} + +namespace { +class ShaderCompileProgressTracker +{ +public: + ShaderCompileProgressTracker(std::string title, u32 total) + : m_title(std::move(title)), m_min_time(Common::Timer::ConvertSecondsToValue(1.0)), + m_update_interval(Common::Timer::ConvertSecondsToValue(0.1)), m_start_time(Common::Timer::GetCurrentValue()), + m_last_update_time(0), m_progress(0), m_total(total) + { + } + ~ShaderCompileProgressTracker() = default; + + void Increment() + { + m_progress++; + + const u64 tv = Common::Timer::GetCurrentValue(); + if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) + { + Host::DisplayLoadingScreen(m_title.c_str(), 0, static_cast(m_total), static_cast(m_progress)); + m_last_update_time = tv; + } + } + +private: + std::string m_title; + u64 m_min_time; + u64 m_update_interval; + u64 m_start_time; + u64 m_last_update_time; + u32 m_progress; + u32 m_total; +}; +} // namespace + GPU_HW::GPU_HW() : GPU() { m_vram_ptr = m_vram_shadow.data(); + +#ifdef _DEBUG + s_draw_number = 0; +#endif } GPU_HW::~GPU_HW() { + g_gpu_device->ClearDisplayTexture(); + if (m_sw_renderer) { m_sw_renderer->Shutdown(); @@ -57,14 +133,24 @@ const Threading::Thread* GPU_HW::GetSWThread() const return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; } +bool GPU_HW::IsHardwareRenderer() const +{ + return true; +} + bool GPU_HW::Initialize() { if (!GPU::Initialize()) return false; + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + m_max_resolution_scale = g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH; + m_supports_dual_source_blend = features.dual_source_blend; + m_supports_per_sample_shading = features.per_sample_shading; + m_supports_disable_color_perspective = features.noperspective_interpolation; + m_resolution_scale = CalculateResolutionScale(); - m_multisamples = std::min(g_settings.gpu_multisamples, m_max_multisamples); - m_render_api = g_host_display->GetRenderAPI(); + m_multisamples = std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples()); m_per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; @@ -76,8 +162,7 @@ bool GPU_HW::Initialize() if (m_multisamples != g_settings.gpu_multisamples) { - Host::AddFormattedOSDMessage(20.0f, - TRANSLATE("OSDMessage", "%ux MSAA is not supported, using %ux instead."), + Host::AddFormattedOSDMessage(20.0f, TRANSLATE("OSDMessage", "%ux MSAA is not supported, using %ux instead."), g_settings.gpu_multisamples, m_multisamples); } if (!m_per_sample_shading && g_settings.gpu_per_sample_shading) @@ -91,14 +176,7 @@ bool GPU_HW::Initialize() Settings::GetTextureFilterDisplayName(m_texture_filtering)); m_texture_filtering = GPUTextureFilter::Nearest; } - if (!m_supports_adaptive_downsampling && g_settings.gpu_resolution_scale > 1 && - g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive) - { - Host::AddOSDMessage( - TRANSLATE_STR( - "OSDMessage", "Adaptive downsampling is not supported with the current renderer, using box filter instead."), - 20.0f); - } + if (!m_supports_disable_color_perspective && !ShouldDisableColorPerspective()) Log_WarningPrint("Disable color perspective not supported, but should be used."); @@ -107,6 +185,20 @@ bool GPU_HW::Initialize() UpdateSoftwareRenderer(false); PrintSettingsToLog(); + + if (!CompilePipelines()) + { + Log_ErrorPrintf("Failed to compile pipelines"); + return false; + } + + if (!CreateBuffers()) + { + Log_ErrorPrintf("Failed to create framebuffer"); + return false; + } + + RestoreGraphicsAPIState(); return true; } @@ -125,7 +217,8 @@ void GPU_HW::Reset(bool clear_vram) m_batch_ubo_dirty = true; m_current_depth = 1; - SetFullVRAMDirtyRectangle(); + if (clear_vram) + ClearFramebuffer(); } bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) @@ -133,6 +226,42 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di if (!GPU::DoState(sw, host_texture, update_display)) return false; + if (host_texture) + { + GPUTexture* tex = *host_texture; + if (sw.IsReading()) + { + if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || + tex->GetSamples() != m_vram_texture->GetSamples()) + { + return false; + } + + g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, tex, 0, 0, 0, 0, tex->GetWidth(), + tex->GetHeight()); + } + else + { + if (!tex || tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || + tex->GetSamples() != m_vram_texture->GetSamples()) + { + delete tex; + + tex = + g_gpu_device + ->CreateTexture(m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), + GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, nullptr, 0, false) + .release(); + *host_texture = tex; + if (!tex) + return false; + } + + g_gpu_device->CopyTextureRegion(tex, 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, tex->GetWidth(), + tex->GetHeight()); + } + } + // invalidate the whole VRAM read texture when loading state if (sw.IsReading()) { @@ -144,18 +273,27 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return true; } -void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) +void GPU_HW::RestoreGraphicsAPIState() +{ + g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + SetScissor(); + m_batch_ubo_dirty = true; +} + +void GPU_HW::UpdateSettings() { const u32 resolution_scale = CalculateResolutionScale(); - const u32 multisamples = std::min(m_max_multisamples, g_settings.gpu_multisamples); + const u32 multisamples = std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples()); const bool per_sample_shading = g_settings.gpu_per_sample_shading && m_supports_per_sample_shading; const GPUDownsampleMode downsample_mode = GetDownsampleMode(resolution_scale); const bool use_uv_limits = ShouldUseUVLimits(); const bool disable_color_perspective = m_supports_disable_color_perspective && ShouldDisableColorPerspective(); - *framebuffer_changed = + const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_downsample_mode != downsample_mode); - *shaders_changed = + const bool shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || @@ -166,10 +304,10 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) if (m_resolution_scale != resolution_scale) { Host::AddKeyedFormattedOSDMessage( - "ResolutionScale", 10.0f, - TRANSLATE("OSDMessage", "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)"), resolution_scale, - m_crtc_state.display_vram_width * resolution_scale, resolution_scale * m_crtc_state.display_vram_height, - VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale); + "ResolutionScale", 10.0f, TRANSLATE("OSDMessage", "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)"), + resolution_scale, m_crtc_state.display_vram_width * resolution_scale, + resolution_scale * m_crtc_state.display_vram_height, VRAM_WIDTH * resolution_scale, + VRAM_HEIGHT * resolution_scale); } if (m_multisamples != multisamples || m_per_sample_shading != per_sample_shading) @@ -177,17 +315,24 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) if (per_sample_shading) { Host::AddKeyedFormattedOSDMessage( - "Multisampling", 10.0f, TRANSLATE("OSDMessage", "Multisample anti-aliasing set to %ux (SSAA)."), - multisamples); + "Multisampling", 10.0f, TRANSLATE("OSDMessage", "Multisample anti-aliasing set to %ux (SSAA)."), multisamples); } else { Host::AddKeyedFormattedOSDMessage("Multisampling", 10.0f, - TRANSLATE("OSDMessage", "Multisample anti-aliasing set to %ux."), - multisamples); + TRANSLATE("OSDMessage", "Multisample anti-aliasing set to %ux."), multisamples); } } + // Back up VRAM if we're recreating the framebuffer. + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + g_gpu_device->ClearDisplayTexture(); + DestroyBuffers(); + } + m_resolution_scale = resolution_scale; m_multisamples = multisamples; m_per_sample_shading = per_sample_shading; @@ -213,6 +358,25 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) UpdateSoftwareRenderer(true); PrintSettingsToLog(); + + if (shaders_changed) + { + DestroyPipelines(); + if (!CompilePipelines()) + Panic("Failed to recompile pipelnes."); + } + + if (framebuffer_changed) + { + // TODO: weird vram loss when rapidly changing resolutions + if (!CreateBuffers()) + Panic("Failed to recreate buffers."); + + RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); + UpdateDepthBufferFromMaskBit(); + UpdateDisplay(); + } } u32 GPU_HW::CalculateResolutionScale() const @@ -231,14 +395,13 @@ u32 GPU_HW::CalculateResolutionScale() const (m_console_is_pal ? (PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START) : (NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START)); const s32 preferred_scale = - static_cast(std::ceil(static_cast(g_host_display->GetWindowHeight()) / height)); + static_cast(std::ceil(static_cast(g_gpu_device->GetWindowHeight()) / height)); Log_InfoPrintf("Height = %d, preferred scale = %d", height, preferred_scale); scale = static_cast(std::clamp(preferred_scale, 1, m_max_resolution_scale)); } - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && m_supports_adaptive_downsampling && scale > 1 && - !Common::IsPow2(scale)) + if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) { const u32 new_scale = Common::PreviousPow2(scale); Log_InfoPrintf("Resolution scale %ux not supported for adaptive smoothing, using %ux", scale, new_scale); @@ -246,9 +409,8 @@ u32 GPU_HW::CalculateResolutionScale() const if (g_settings.gpu_resolution_scale != 0) { Host::AddFormattedOSDMessage( - 10.0f, - TRANSLATE("OSDMessage", "Resolution scale %ux not supported for adaptive smoothing, using %ux."), - scale, new_scale); + 10.0f, TRANSLATE("OSDMessage", "Resolution scale %ux not supported for adaptive smoothing, using %ux."), scale, + new_scale); } scale = new_scale; @@ -267,13 +429,28 @@ void GPU_HW::UpdateResolutionScale() GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const { - if (resolution_scale == 1) - return GPUDownsampleMode::Disabled; + return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; +} - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive) - return m_supports_adaptive_downsampling ? GPUDownsampleMode::Adaptive : GPUDownsampleMode::Box; +bool GPU_HW::IsUsingMultisampling() const +{ + return m_multisamples > 1; +} - return g_settings.gpu_downsample_mode; +bool GPU_HW::IsUsingDownsampling() const +{ + return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); +} + +void GPU_HW::SetFullVRAMDirtyRectangle() +{ + m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + m_draw_mode.SetTexturePageChanged(); +} + +void GPU_HW::ClearVRAMDirtyRectangle() +{ + m_vram_dirty_rect.SetInvalid(); } std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) @@ -303,12 +480,669 @@ void GPU_HW::PrintSettingsToLog() Log_InfoPrintf("Using software renderer for readbacks: %s", m_sw_renderer ? "YES" : "NO"); } +bool GPU_HW::CreateBuffers() +{ + DestroyBuffers(); + + // scale vram size to internal resolution + const u32 texture_width = VRAM_WIDTH * m_resolution_scale; + const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; + const u8 samples = static_cast(m_multisamples); + + if (!(m_vram_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_vram_depth_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) || + !(m_vram_read_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, + GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || + !(m_display_texture = g_gpu_device->CreateTexture( + ((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) * + m_resolution_scale, + GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_vram_readback_texture = g_gpu_device->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) + { + return false; + } + + GL_OBJECT_NAME(m_vram_texture, "VRAM Texture"); + GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); + GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); + GL_OBJECT_NAME(m_display_texture, "Display Texture"); + GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); + + // vram framebuffer has both colour and depth + if (!(m_vram_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_texture.get(), m_vram_depth_texture.get())) || + !(m_vram_update_depth_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_depth_texture.get())) || + !(m_vram_readback_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_readback_texture.get())) || + !(m_display_framebuffer = g_gpu_device->CreateFramebuffer(m_display_texture.get()))) + { + return false; + } + + GL_OBJECT_NAME(m_vram_framebuffer, "VRAM Framebuffer"); + GL_OBJECT_NAME(m_vram_update_depth_framebuffer, "VRAM Update Depth Framebuffer"); + GL_OBJECT_NAME(m_vram_readback_framebuffer, "VRAM Readback Framebuffer"); + GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer"); + + if (!(m_vram_upload_buffer = g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, + VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16)))) + { + return false; + } + + Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height); + + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + { + const u32 levels = GetAdaptiveDownsamplingMipLevels(); + + if (!(m_downsample_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, levels, 1, + GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || + !(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) || + !(m_downsample_weight_texture = + g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1, + GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) || + !(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get()))) + { + return false; + } + } + else if (m_downsample_mode == GPUDownsampleMode::Box) + { + if (!(m_downsample_render_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get()))) + { + return false; + } + } + + g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + SetFullVRAMDirtyRectangle(); + return true; +} + +void GPU_HW::ClearFramebuffer() +{ + g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + ClearVRAMDirtyRectangle(); + g_gpu_device->ClearRenderTarget(m_display_texture.get(), 0); + m_last_depth_z = 1.0f; +} + +void GPU_HW::DestroyBuffers() +{ + m_vram_upload_buffer.reset(); + m_downsample_weight_framebuffer.reset(); + m_downsample_weight_texture.reset(); + m_downsample_framebuffer.reset(); + m_downsample_render_texture.reset(); + m_downsample_texture.reset(); + m_display_framebuffer.reset(); + m_vram_readback_framebuffer.reset(); + m_vram_update_depth_framebuffer.reset(); + m_vram_framebuffer.reset(); + m_vram_read_texture.reset(); + m_vram_depth_view.reset(); + m_vram_depth_texture.reset(); + m_vram_texture.reset(); + m_vram_readback_texture.reset(); + m_display_texture.reset(); +} + +bool GPU_HW::CompilePipelines() +{ + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); + + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + + // vertex shaders - [textured] + // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; + DimensionalArray, 2> batch_vertex_shaders{}; + DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; + ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { + batch_vertex_shaders.enumerate(destroy_shader); + batch_fragment_shaders.enumerate(destroy_shader); + }); + + for (u8 textured = 0; textured < 2; textured++) + { + const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); + if (!(batch_vertex_shaders[textured] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs))) + return false; + + progress.Increment(); + } + + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(texture_mode), + ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + + if (!(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } + } + } + } + + static constexpr GPUPipeline::VertexAttribute vertex_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, + GPUPipeline::VertexAttribute::Type::Float, 4, offsetof(BatchVertex, x)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::Color, 0, + GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)), + GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, + GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, u)), + GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Semantic::TexCoord, 1, + GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, texpage)), + GPUPipeline::VertexAttribute::Make(4, GPUPipeline::VertexAttribute::Semantic::TexCoord, 2, + GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, uv_limits)), + }; + static constexpr u32 NUM_BATCH_VERTEX_ATTRIBUTES = 2; + static constexpr u32 NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES = 4; + static constexpr u32 NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES = 5; + + GPUPipeline::GraphicsConfig plconfig = {}; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndUBO; + plconfig.input_layout.vertex_stride = sizeof(BatchVertex); + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.color_format = VRAM_RT_FORMAT; + plconfig.depth_format = VRAM_DS_FORMAT; + plconfig.samples = m_multisamples; + plconfig.per_sample_shading = m_per_sample_shading; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + for (u8 depth_test = 0; depth_test < 3; depth_test++) + { + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + static constexpr std::array depth_test_values = { + GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, + GPUPipeline::DepthFunc::LessEqual}; + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + + plconfig.input_layout.vertex_attributes = + textured ? + (m_using_uv_limits ? gsl::span( + vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : + gsl::span( + vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : + gsl::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); + + plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); + plconfig.fragment_shader = + batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].get(); + + // TODO: Depth write always on??? + plconfig.depth.depth_test = depth_test_values[depth_test]; + plconfig.depth.depth_write = true; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + + if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + m_texture_filtering != GPUTextureFilter::Nearest) + { + plconfig.blend.enable = true; + plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; + plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + + if (m_supports_dual_source_blend) + { + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + } + else + { + const u32 factor = (static_cast(transparency_mode) == + GPUTransparencyMode::HalfBackgroundPlusHalfForeground) ? + 0xFF808080u : + 0xFFFFFFFFu; + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + plconfig.blend.constant = factor; + } + } + + if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering] + [interlacing] = g_gpu_device->CreatePipeline(plconfig))) + { + return false; + } + + progress.Increment(); + } + } + } + } + } + } + + batch_shader_guard.Run(); + + std::unique_ptr fullscreen_quad_vertex_shader = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); + std::unique_ptr uv_quad_vertex_shader = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateUVQuadVertexShader()); + if (!fullscreen_quad_vertex_shader || !uv_quad_vertex_shader) + return false; + + progress.Increment(); + + // common state + plconfig.input_layout.vertex_attributes = {}; + plconfig.input_layout.vertex_stride = 0; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.per_sample_shading = false; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); + + // VRAM fill + for (u8 wrapped = 0; wrapped < 2; wrapped++) + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); + + if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + + // VRAM copy + { + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMCopyFragmentShader()); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + plconfig.depth.depth_write = true; + plconfig.depth.depth_test = + (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; + + if (!(m_vram_copy_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_vram_copy_pipelines[depth_test], "VRAM Write Pipeline, depth=%u", depth_test); + + progress.Increment(); + } + } + + // VRAM write + { + const bool use_ssbo = features.texture_buffers_emulated_with_ssbo; + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMWriteFragmentShader(use_ssbo)); + if (!fs) + return false; + + plconfig.layout = GPUPipeline::Layout::SingleTextureBufferAndPushConstants; + plconfig.fragment_shader = fs.get(); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + plconfig.depth.depth_write = true; + plconfig.depth.depth_test = + (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; + + if (!(m_vram_write_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_vram_write_pipelines[depth_test], "VRAM Write Pipeline, depth=%u", depth_test); + + progress.Increment(); + } + } + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + + // VRAM update depth + { + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader()); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + plconfig.color_format = GPUTexture::Format::Unknown; + plconfig.depth_format = VRAM_DS_FORMAT; + plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); + plconfig.blend.write_mask = 0; + + if (!(m_vram_update_depth_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_vram_update_depth_pipeline, "VRAM Update Depth Pipeline"); + + progress.Increment(); + } + + plconfig.color_format = VRAM_RT_FORMAT; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.samples = 1; + plconfig.per_sample_shading = false; + + // VRAM read + { + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMReadFragmentShader()); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + + if (!(m_vram_readback_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_vram_readback_pipeline, "VRAM Read Pipeline"); + progress.Increment(); + } + + // Display + { + for (u8 depth_24 = 0; depth_24 < 2; depth_24++) + { + for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) + { + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, + shadergen.GenerateDisplayFragmentShader( + ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + + if (!(m_display_pipelines[depth_24][interlace_mode] = g_gpu_device->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + } + + { + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateCopyFragmentShader()); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + if (!(m_copy_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + } + + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + { + std::unique_ptr vs = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateAdaptiveDownsampleVertexShader()); + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); + if (!vs || !fs) + return false; + GL_OBJECT_NAME(fs, "Downsample Vertex Shader"); + GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); + plconfig.vertex_shader = vs.get(); + plconfig.fragment_shader = fs.get(); + if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); + + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); + if (!fs) + return false; + GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline"); + + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); + if (!fs) + return false; + GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + plconfig.color_format = GPUTexture::Format::R8; + if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); + + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); + if (!fs) + return false; + GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fs.get(); + plconfig.color_format = VRAM_RT_FORMAT; + if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); + + GPUSampler::Config config = GPUSampler::GetLinearConfig(); + config.min_lod = 0; + config.max_lod = GPUSampler::Config::LOD_MAX; + if (!(m_downsample_lod_sampler = g_gpu_device->CreateSampler(config))) + return false; + GL_OBJECT_NAME(m_downsample_lod_sampler, "Downsample LOD Sampler"); + config.mip_filter = GPUSampler::Filter::Linear; + if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config))) + return false; + GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler"); + } + else if (m_downsample_mode == GPUDownsampleMode::Box) + { + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateBoxSampleDownsampleFragmentShader()); + if (!fs) + return false; + + GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + + if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); + } + + progress.Increment(); + +#undef UPDATE_PROGRESS + + return true; +} + +void GPU_HW::DestroyPipelines() +{ + static constexpr auto destroy = [](std::unique_ptr& p) { p.reset(); }; + + m_batch_pipelines.enumerate(destroy); + + m_vram_fill_pipelines.enumerate(destroy); + + for (std::unique_ptr& p : m_vram_write_pipelines) + destroy(p); + + for (std::unique_ptr& p : m_vram_copy_pipelines) + destroy(p); + + destroy(m_vram_readback_pipeline); + destroy(m_vram_update_depth_pipeline); + + destroy(m_downsample_first_pass_pipeline); + destroy(m_downsample_mid_pass_pipeline); + destroy(m_downsample_blur_pass_pipeline); + destroy(m_downsample_composite_pass_pipeline); + m_downsample_composite_sampler.reset(); + + m_display_pipelines.enumerate(destroy); +} + void GPU_HW::UpdateVRAMReadTexture() { + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + + if (m_vram_texture->IsMultisampled()) + { + if (g_gpu_device->GetFeatures().partial_msaa_resolve) + { + g_gpu_device->ResolveTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, + m_vram_texture.get(), scaled_rect.left, scaled_rect.top, + scaled_rect.GetWidth(), scaled_rect.GetHeight()); + } + else + { + g_gpu_device->ResolveTextureRegion(m_vram_read_texture.get(), 0, 0, 0, 0, m_vram_texture.get(), 0, 0, + m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + } + } + else + { + g_gpu_device->CopyTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, + m_vram_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, + scaled_rect.GetWidth(), scaled_rect.GetHeight()); + } + m_renderer_stats.num_vram_read_texture_updates++; ClearVRAMDirtyRectangle(); } +void GPU_HW::UpdateDepthBufferFromMaskBit() +{ + if (m_pgxp_depth_buffer) + return; + + // Viewport should already be set full, only need to fudge the scissor. + g_gpu_device->SetScissor(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + g_gpu_device->SetFramebuffer(m_vram_update_depth_framebuffer.get()); + g_gpu_device->SetPipeline(m_vram_update_depth_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->Draw(3, 0); + + // Restore. + g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + SetScissor(); +} + +void GPU_HW::ClearDepthBuffer() +{ + DebugAssert(m_pgxp_depth_buffer); + + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f); + m_last_depth_z = 1.0f; +} + +void GPU_HW::SetScissor() +{ + const s32 left = m_drawing_area.left * m_resolution_scale; + const s32 right = std::max((m_drawing_area.right + 1) * m_resolution_scale, left + 1); + const s32 top = m_drawing_area.top * m_resolution_scale; + const s32 bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, top + 1); + + g_gpu_device->SetScissor(left, top, right - left, bottom - top); +} + +void GPU_HW::MapBatchVertexPointer(u32 required_vertices) +{ + DebugAssert(!m_batch_start_vertex_ptr); + + void* map; + u32 space; + g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex); + + m_batch_start_vertex_ptr = static_cast(map); + m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; + m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space; +} + +void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices) +{ + DebugAssert(m_batch_start_vertex_ptr); + g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices); + m_batch_start_vertex_ptr = nullptr; + m_batch_end_vertex_ptr = nullptr; + m_batch_current_vertex_ptr = nullptr; +} + +void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) +{ + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + const u8 depth_test = m_batch.use_depth_buffer ? static_cast(2) : BoolToUInt8(m_batch.check_mask_before_draw); + g_gpu_device->SetPipeline( + m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( + m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] + .get()); + g_gpu_device->Draw(num_vertices, base_vertex); +} + +void GPU_HW::ClearDisplay() +{ + g_gpu_device->ClearDisplayTexture(); + g_gpu_device->ClearRenderTarget(m_display_texture.get(), 0xFF000000u); +} + void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp @@ -469,23 +1303,6 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const return levels; } -GPU_HW::SmoothingUBOData GPU_HW::GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width, - u32 tex_height) const -{ - const float rcp_width = 1.0f / static_cast(tex_width >> level); - const float rcp_height = 1.0f / static_cast(tex_height >> level); - - SmoothingUBOData data; - data.min_uv[0] = static_cast(left >> level) * rcp_width; - data.min_uv[1] = static_cast(top >> level) * rcp_height; - data.max_uv[0] = static_cast((left + width) >> level) * rcp_width; - data.max_uv[1] = static_cast((top + height) >> level) * rcp_height; - data.rcp_size[0] = rcp_width; - data.rcp_size[1] = rcp_height; - - return data; -} - void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) { const float dx = x1 - x0; @@ -982,82 +1799,38 @@ void GPU_HW::LoadVertices() } } -void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) +bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, + u32 height) { - *left = m_drawing_area.left * m_resolution_scale; - *right = std::max((m_drawing_area.right + 1) * m_resolution_scale, *left + 1); - *top = m_drawing_area.top * m_resolution_scale; - *bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); -} - -GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const -{ - // drop precision unless true colour is enabled - if (!m_true_color) - color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)); - - VRAMFillUBOData uniforms; - uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale; - uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale; - uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale; - uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale; - std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = - RGBA8ToFloat(color); - - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); - return uniforms; -} - -Common::Rectangle GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const -{ - Common::Rectangle out_rc = Common::Rectangle::FromExtents(x % VRAM_WIDTH, y % VRAM_HEIGHT, width, height); - if (out_rc.right > VRAM_WIDTH) + if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || + m_vram_replacement_texture->GetHeight() < tex->GetHeight()) { - out_rc.left = 0; - out_rc.right = VRAM_WIDTH; + m_vram_replacement_texture.reset(); + + if (!(m_vram_replacement_texture = + g_gpu_device->CreateTexture(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, tex->GetPixels(), tex->GetPitch(), true))) + { + return false; + } } - if (out_rc.bottom > VRAM_HEIGHT) + else { - out_rc.top = 0; - out_rc.bottom = VRAM_HEIGHT; + if (!m_vram_replacement_texture->Update(0, 0, width, height, tex->GetPixels(), tex->GetPitch())) + { + Log_ErrorPrintf("Update %ux%u texture failed.", width, height); + return false; + } } - return out_rc; -} -GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset, - bool set_mask, bool check_mask) const -{ - const VRAMWriteUBOData uniforms = { - (x % VRAM_WIDTH), (y % VRAM_HEIGHT), ((x + width) % VRAM_WIDTH), ((y + height) % VRAM_HEIGHT), width, - height, buffer_offset, (set_mask) ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()}; - return uniforms; -} + g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); // TODO: needed? + g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler()); + g_gpu_device->SetPipeline(m_copy_pipeline.get()); + g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height); + g_gpu_device->Draw(3, 0); -bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const -{ - // masking enabled, oversized, or overlapping - return (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || - Common::Rectangle::FromExtents(src_x, src_y, width, height) - .Intersects(Common::Rectangle::FromExtents(dst_x, dst_y, width, height))); -} - -GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, - u32 height) const -{ - const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale, - (src_y % VRAM_HEIGHT) * m_resolution_scale, - (dst_x % VRAM_WIDTH) * m_resolution_scale, - (dst_y % VRAM_HEIGHT) * m_resolution_scale, - ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, - ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, - width * m_resolution_scale, - height * m_resolution_scale, - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, - GetCurrentNormalizedVertexDepth()}; - - return uniforms; + RestoreGraphicsAPIState(); + return true; } void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle& rect) @@ -1074,6 +1847,19 @@ void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle& rect) } } +GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const +{ + if (IsInterlacedDisplayEnabled()) + { + return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : + InterlacedRenderMode::SeparateFields; + } + else + { + return InterlacedRenderMode::None; + } +} + void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices) { if (m_batch_current_vertex_ptr) @@ -1189,61 +1975,105 @@ void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) co cmd->window = m_draw_mode.texture_window; } -void GPU_HW::ReadSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height) -{ - DebugAssert(m_sw_renderer); - m_sw_renderer->Sync(false); -} - -void GPU_HW::UpdateSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, - bool check_mask) -{ - const u32 num_words = width * height; - GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); - FillBackendCommandParameters(cmd); - cmd->params.set_mask_while_drawing = set_mask; - cmd->params.check_mask_before_draw = check_mask; - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - std::memcpy(cmd->data, data, sizeof(u16) * num_words); - m_sw_renderer->PushCommand(cmd); -} - -void GPU_HW::FillSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->x = static_cast(x); - cmd->y = static_cast(y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - cmd->color = color; - m_sw_renderer->PushCommand(cmd); -} - -void GPU_HW::CopySoftwareRendererVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); - FillBackendCommandParameters(cmd); - cmd->src_x = static_cast(src_x); - cmd->src_y = static_cast(src_y); - cmd->dst_x = static_cast(dst_x); - cmd->dst_y = static_cast(dst_y); - cmd->width = static_cast(width); - cmd->height = static_cast(height); - m_sw_renderer->PushCommand(cmd); -} - void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { + if (m_sw_renderer) + { + GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->x = static_cast(x); + cmd->y = static_cast(y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + cmd->color = color; + m_sw_renderer->PushCommand(cmd); + } + IncludeVRAMDirtyRectangle( Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + + const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); + g_gpu_device->SetPipeline( + m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); + + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + g_gpu_device->SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, + bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); + + struct VRAMFillUBOData + { + u32 u_dst_x; + u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; + std::array u_fill_color; + u32 u_interlaced_displayed_field; + }; + VRAMFillUBOData uniforms; + uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale; + uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale; + uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale; + uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale; + // drop precision unless true colour is enabled + uniforms.u_fill_color = + GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); + uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ + if (m_sw_renderer) + { + m_sw_renderer->Sync(false); + return; + } + + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get()); + g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + m_vram_readback_texture->MakeReadyForSampling(); + + // Stage the readback and copy it into our shadow buffer. + g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, + reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), + VRAM_WIDTH * sizeof(u16)); + + RestoreGraphicsAPIState(); } void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) { + if (m_sw_renderer) + { + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); + FillBackendCommandParameters(cmd); + cmd->params.set_mask_while_drawing = set_mask; + cmd->params.check_mask_before_draw = check_mask; + cmd->x = static_cast(x); + cmd->y = static_cast(y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + std::memcpy(cmd->data, data, sizeof(u16) * num_words); + m_sw_renderer->PushCommand(cmd); + } + + const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); + DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT); IncludeVRAMDirtyRectangle(Common::Rectangle::FromExtents(x, y, width, height)); @@ -1252,10 +2082,129 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } + else + { + const TextureReplacementTexture* rtex = g_texture_replacements.GetVRAMWriteReplacement(width, height, data); + if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, + width * m_resolution_scale, height * m_resolution_scale)) + { + return; + } + } + + const u32 num_pixels = width * height; + void* map = m_vram_upload_buffer->Map(num_pixels); + const u32 map_index = m_vram_upload_buffer->GetCurrentPosition(); + std::memcpy(map, data, num_pixels * sizeof(u16)); + m_vram_upload_buffer->Unmap(num_pixels); + + struct VRAMWriteUBOData + { + u32 u_dst_x; + u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; + u32 u_width; + u32 u_height; + u32 u_buffer_base_offset; + u32 u_mask_or_bits; + float u_depth_value; + }; + const VRAMWriteUBOData uniforms = { + (x % VRAM_WIDTH), (y % VRAM_HEIGHT), ((x + width) % VRAM_WIDTH), ((y + height) % VRAM_HEIGHT), width, + height, map_index, (set_mask) ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()}; + + // the viewport should already be set to the full vram, so just adjust the scissor + const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); + g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)].get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get()); + g_gpu_device->Draw(3, 0); + + RestoreGraphicsAPIState(); } void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { + if (m_sw_renderer) + { + GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->src_x = static_cast(src_x); + cmd->src_y = static_cast(src_y); + cmd->dst_x = static_cast(dst_x); + cmd->dst_y = static_cast(dst_y); + cmd->width = static_cast(width); + cmd->height = static_cast(height); + m_sw_renderer->PushCommand(cmd); + } + + // masking enabled, oversized, or overlapping + const bool use_shader = + (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || + Common::Rectangle::FromExtents(src_x, src_y, width, height) + .Intersects(Common::Rectangle::FromExtents(dst_x, dst_y, width, height))); + + if (use_shader || IsUsingMultisampling()) + { + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + if (m_vram_dirty_rect.Intersects(src_bounds)) + UpdateVRAMReadTexture(); + IncludeVRAMDirtyRectangle(dst_bounds); + + struct VRAMCopyUBOData + { + u32 u_src_x; + u32 u_src_y; + u32 u_dst_x; + u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; + u32 u_width; + u32 u_height; + u32 u_set_mask_bit; + float u_depth_value; + }; + const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale, + (src_y % VRAM_HEIGHT) * m_resolution_scale, + (dst_x % VRAM_WIDTH) * m_resolution_scale, + (dst_y % VRAM_HEIGHT) * m_resolution_scale, + ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, + ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, + width * m_resolution_scale, + height * m_resolution_scale, + m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + GetCurrentNormalizedVertexDepth()}; + + // VRAM read texture should already be bound. + const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + g_gpu_device->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), + dst_bounds_scaled.GetHeight()); + g_gpu_device->SetPipeline( + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)].get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + RestoreGraphicsAPIState(); + + if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) + m_current_depth++; + + return; + } + + // We can't CopySubresourceRegion to the same resource. So use the shadow texture if we can, but that may need to be + // updated first. Copying to the same resource seemed to work on Windows 10, but breaks on Windows 7. But, it's + // against the API spec, so better to be safe than sorry. + + // TODO: make this an optional feature, DX12 can do it + + if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) + UpdateVRAMReadTexture(); + IncludeVRAMDirtyRectangle( Common::Rectangle::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); @@ -1264,6 +2213,11 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } + + g_gpu_device->CopyTextureRegion(m_vram_texture.get(), dst_x * m_resolution_scale, dst_y * m_resolution_scale, 0, 0, + m_vram_read_texture.get(), src_x * m_resolution_scale, src_y * m_resolution_scale, 0, + 0, width * m_resolution_scale, height * m_resolution_scale); + m_vram_read_texture->MakeReadyForSampling(); } void GPU_HW::DispatchRenderCommand() @@ -1364,7 +2318,7 @@ void GPU_HW::DispatchRenderCommand() if (m_drawing_area_changed) { m_drawing_area_changed = false; - SetScissorFromDrawingArea(); + SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) ClearDepthBuffer(); @@ -1391,9 +2345,14 @@ void GPU_HW::FlushRender() if (vertex_count == 0) return; +#ifdef _DEBUG + GL_SCOPE("Hardware Draw %u", ++s_draw_number); +#endif + if (m_batch_ubo_dirty) { - UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); + g_gpu_device->UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); + m_renderer_stats.num_uniform_buffer_updates++; m_batch_ubo_dirty = false; } @@ -1410,6 +2369,224 @@ void GPU_HW::FlushRender() } } +void GPU_HW::UpdateDisplay() +{ + FlushRender(); + + if (g_settings.debugging.show_vram) + { + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + g_gpu_device->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(), + m_vram_read_texture->GetHeight()); + } + else + { + g_gpu_device->SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(), + m_vram_texture->GetHeight()); + } + + g_gpu_device->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + } + else + { + // TODO: use a dynamically sized texture + g_gpu_device->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, + m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, + GetDisplayAspectRatio()); + + const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; + const u32 vram_offset_x = m_crtc_state.display_vram_left; + const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; + const u32 display_width = m_crtc_state.display_vram_width; + const u32 display_height = m_crtc_state.display_vram_height; + const u32 scaled_display_width = display_width * resolution_scale; + const u32 scaled_display_height = display_height * resolution_scale; + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + + if (IsDisplayDisabled()) + { + g_gpu_device->ClearDisplayTexture(); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) + { + + if (IsUsingDownsampling()) + { + DownsampleFramebuffer(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); + } + else + { + g_gpu_device->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height); + } + } + else + { + // TODO: discard vs load for interlaced + if (interlaced == InterlacedRenderMode::None) + g_gpu_device->InvalidateRenderTarget(m_display_texture.get()); + + g_gpu_device->SetFramebuffer(m_display_framebuffer.get()); + g_gpu_device->SetPipeline( + m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + + const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; + const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, + reinterpret_crop_left, reinterpret_field_offset}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + + Assert(scaled_display_width <= m_display_texture->GetWidth() && + scaled_display_height <= m_display_texture->GetHeight()); + + g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); + g_gpu_device->Draw(3, 0); + + if (IsUsingDownsampling()) + DownsampleFramebuffer(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + else + g_gpu_device->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + + RestoreGraphicsAPIState(); + } + } +} + +void GPU_HW::DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + DownsampleFramebufferAdaptive(source, left, top, width, height); + else + DownsampleFramebufferBoxFilter(source, left, top, width, height); +} + +void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ + GL_PUSH("DownsampleFramebufferAdaptive (%u,%u => %u,%d)", left, top, left + width, left + height); + + struct SmoothingUBOData + { + float min_uv[2]; + float max_uv[2]; + float rcp_size[2]; + float lod; + }; + + g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height); + g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_lod_sampler.get()); + + const u32 levels = m_downsample_texture->GetLevels(); + SmoothingUBOData uniforms; + + // create mip chain + for (u32 level = 1; level < levels; level++) + { + GL_SCOPE("Create miplevel %u", level); + + const u32 level_width = width >> level; + const u32 level_height = height >> level; + const float rcp_width = 1.0f / static_cast(m_downsample_texture->GetMipWidth(level)); + const float rcp_height = 1.0f / static_cast(m_downsample_texture->GetMipHeight(level)); + uniforms.min_uv[0] = 0.0f; + uniforms.min_uv[1] = 0.0f; + uniforms.max_uv[0] = static_cast(level_width) * rcp_width; + uniforms.max_uv[1] = static_cast(level_height) * rcp_height; + uniforms.rcp_size[0] = rcp_width; + uniforms.rcp_size[1] = rcp_height; + uniforms.lod = static_cast(level - 1); + + g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height); + g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : + m_downsample_mid_pass_pipeline.get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, level, m_downsample_render_texture.get(), 0, 0, + 0, 0, level_width, level_height); + } + + // blur pass at lowest level + { + GL_SCOPE("Blur"); + + const u32 last_level = levels - 1; + const u32 last_width = width >> last_level; + const u32 last_height = height >> last_level; + const float rcp_width = 1.0f / static_cast(m_downsample_render_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(m_downsample_render_texture->GetHeight()); + uniforms.min_uv[0] = 0.0f; + uniforms.min_uv[1] = 0.0f; + uniforms.max_uv[0] = static_cast(last_width) * rcp_width; + uniforms.max_uv[1] = static_cast(last_height) * rcp_height; + uniforms.rcp_size[0] = rcp_width; + uniforms.rcp_size[1] = rcp_height; + uniforms.lod = 0.0f; + + m_downsample_render_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_downsample_weight_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get()); + g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height); + g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + m_downsample_weight_texture->MakeReadyForSampling(); + } + + // composite downsampled and upsampled images together + { + GL_SCOPE("Composite"); + + g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get()); + g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); + g_gpu_device->Draw(3, 0); + m_downsample_render_texture->MakeReadyForSampling(); + } + + GL_POP(); + + RestoreGraphicsAPIState(); + + g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), 0, 0, width, height); +} + +void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ + const u32 ds_left = left / m_resolution_scale; + const u32 ds_top = top / m_resolution_scale; + const u32 ds_width = width / m_resolution_scale; + const u32 ds_height = height / m_resolution_scale; + + source->MakeReadyForSampling(); + + g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); + g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); + g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); + g_gpu_device->Draw(3, 0); + + RestoreGraphicsAPIState(); + + g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), ds_left, ds_top, ds_width, ds_height); +} + void GPU_HW::DrawRendererStats(bool is_idle_frame) { if (!is_idle_frame) @@ -1488,21 +2665,11 @@ void GPU_HW::DrawRendererStats(bool is_idle_frame) } } -GPU_HW::ShaderCompileProgressTracker::ShaderCompileProgressTracker(std::string title, u32 total) - : m_title(std::move(title)), m_min_time(Common::Timer::ConvertSecondsToValue(1.0)), - m_update_interval(Common::Timer::ConvertSecondsToValue(0.1)), m_start_time(Common::Timer::GetCurrentValue()), - m_last_update_time(0), m_progress(0), m_total(total) +std::unique_ptr GPU::CreateHardwareRenderer() { -} + std::unique_ptr gpu(std::make_unique()); + if (!gpu->Initialize()) + return nullptr; -void GPU_HW::ShaderCompileProgressTracker::Increment() -{ - m_progress++; - - const u64 tv = Common::Timer::GetCurrentValue(); - if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) - { - Host::DisplayLoadingScreen(m_title.c_str(), 0, static_cast(m_total), static_cast(m_progress)); - m_last_update_time = tv; - } + return gpu; } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index f0392ffde..d8b925e48 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -2,9 +2,15 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/heap_array.h" + #include "gpu.h" -#include "util/host_display.h" +#include "texture_replacements.h" + +#include "util/gpu_device.h" + +#include "common/dimensional_array.h" +#include "common/heap_array.h" + #include #include #include @@ -15,7 +21,7 @@ class GPU_SW_Backend; struct GPUBackendCommand; struct GPUBackendDrawCommand; -class GPU_HW : public GPU +class GPU_HW final : public GPU { public: enum class BatchRenderMode : u8 @@ -34,24 +40,26 @@ public: }; GPU_HW(); - virtual ~GPU_HW(); + ~GPU_HW() override; const Threading::Thread* GetSWThread() const override; + bool IsHardwareRenderer() const override; - virtual bool Initialize() override; - virtual void Reset(bool clear_vram) override; - virtual bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + bool Initialize() override; + void Reset(bool clear_vram) override; + bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + void RestoreGraphicsAPIState() override; + + void UpdateSettings() override; void UpdateResolutionScale() override final; std::tuple GetEffectiveDisplayResolution(bool scaled = true) override final; std::tuple GetFullDisplayResolution(bool scaled = true) override final; -protected: +private: enum : u32 { VRAM_UPDATE_TEXTURE_BUFFER_SIZE = 4 * 1024 * 1024, - VERTEX_BUFFER_SIZE = 4 * 1024 * 1024, - UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024, MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2, MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) * (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u) @@ -129,43 +137,6 @@ protected: u32 u_set_mask_while_drawing; }; - struct VRAMFillUBOData - { - u32 u_dst_x; - u32 u_dst_y; - u32 u_end_x; - u32 u_end_y; - float u_fill_color[4]; - u32 u_interlaced_displayed_field; - }; - - struct VRAMWriteUBOData - { - u32 u_dst_x; - u32 u_dst_y; - u32 u_end_x; - u32 u_end_y; - u32 u_width; - u32 u_height; - u32 u_buffer_base_offset; - u32 u_mask_or_bits; - float u_depth_value; - }; - - struct VRAMCopyUBOData - { - u32 u_src_x; - u32 u_src_y; - u32 u_dst_x; - u32 u_dst_y; - u32 u_end_x; - u32 u_end_y; - u32 u_width; - u32 u_height; - u32 u_set_mask_bit; - float u_depth_value; - }; - struct RendererStats { u32 num_batches; @@ -173,63 +144,42 @@ protected: u32 num_uniform_buffer_updates; }; - class ShaderCompileProgressTracker - { - public: - ShaderCompileProgressTracker(std::string title, u32 total); + bool CreateBuffers(); + void ClearFramebuffer(); + void DestroyBuffers(); - void Increment(); + bool CompilePipelines(); + void DestroyPipelines(); - private: - std::string m_title; - u64 m_min_time; - u64 m_update_interval; - u64 m_start_time; - u64 m_last_update_time; - u32 m_progress; - u32 m_total; - }; - - static constexpr std::tuple RGBA8ToFloat(u32 rgba) - { - return std::make_tuple(static_cast(rgba & UINT32_C(0xFF)) * (1.0f / 255.0f), - static_cast((rgba >> 8) & UINT32_C(0xFF)) * (1.0f / 255.0f), - static_cast((rgba >> 16) & UINT32_C(0xFF)) * (1.0f / 255.0f), - static_cast(rgba >> 24) * (1.0f / 255.0f)); - } - - void UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed); - - virtual void UpdateVRAMReadTexture(); - virtual void UpdateDepthBufferFromMaskBit() = 0; - virtual void ClearDepthBuffer() = 0; - virtual void SetScissorFromDrawingArea() = 0; - virtual void MapBatchVertexPointer(u32 required_vertices) = 0; - virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0; - virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0; - virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) = 0; + void UpdateVRAMReadTexture(); + void UpdateDepthBufferFromMaskBit(); + void ClearDepthBuffer(); + void SetScissor(); + void MapBatchVertexPointer(u32 required_vertices); + void UnmapBatchVertexPointer(u32 used_vertices); + void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices); + void ClearDisplay() override; + void UpdateDisplay() override; u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; - ALWAYS_INLINE bool IsUsingMultisampling() const { return m_multisamples > 1; } - ALWAYS_INLINE bool IsUsingDownsampling() const - { - return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); - } + bool IsUsingMultisampling() const; + bool IsUsingDownsampling() const; - void SetFullVRAMDirtyRectangle() - { - m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - m_draw_mode.SetTexturePageChanged(); - } - void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); } + void SetFullVRAMDirtyRectangle(); + void ClearVRAMDirtyRectangle(); void IncludeVRAMDirtyRectangle(const Common::Rectangle& rect); - bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } - - u32 GetBatchVertexSpace() const { return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); } - u32 GetBatchVertexCount() const { return static_cast(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); } + ALWAYS_INLINE bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } + ALWAYS_INLINE u32 GetBatchVertexSpace() const + { + return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); + } + ALWAYS_INLINE u32 GetBatchVertexCount() const + { + return static_cast(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); + } void EnsureVertexBufferSpace(u32 required_vertices); void EnsureVertexBufferSpaceForCurrentCommand(); void ResetBatchVertexDepth(); @@ -241,91 +191,31 @@ protected: } /// Returns the interlaced mode to use when scanning out/displaying. - ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const - { - if (IsInterlacedDisplayEnabled()) - { - return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : - InterlacedRenderMode::SeparateFields; - } - else - { - return InterlacedRenderMode::None; - } - } - - /// Returns true if the specified texture filtering mode requires dual-source blending. - ALWAYS_INLINE bool TextureFilterRequiresDualSourceBlend(GPUTextureFilter filter) - { - return (filter == GPUTextureFilter::Bilinear || filter == GPUTextureFilter::JINC2 || - filter == GPUTextureFilter::xBR); - } - - /// Returns true if alpha blending should be enabled for drawing the current batch. - ALWAYS_INLINE bool UseAlphaBlending(GPUTransparencyMode transparency_mode, BatchRenderMode render_mode) const - { - if (m_texture_filtering == GPUTextureFilter::Bilinear || m_texture_filtering == GPUTextureFilter::JINC2 || - m_texture_filtering == GPUTextureFilter::xBR) - { - return true; - } - - if (transparency_mode == GPUTransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) - return false; - - return true; - } + InterlacedRenderMode GetInterlacedRenderMode() const; /// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled /// on a per-pixel basis, and the opaque pixels shouldn't be blended at all. ALWAYS_INLINE bool NeedsTwoPassRendering() const { + // TODO: see if there's a better way we can do this. definitely can with fbfetch. return (m_batch.texture_mode != GPUTextureMode::Disabled && (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } - /// Returns true if the specified VRAM fill is oversized. - ALWAYS_INLINE static bool IsVRAMFillOversized(u32 x, u32 y, u32 width, u32 height) - { - return ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT); - } - - ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast(m_sw_renderer); } - void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; void UpdateSoftwareRenderer(bool copy_vram_from_hw); - void ReadSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height); - void UpdateSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, - bool check_mask); - void FillSoftwareRendererVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); - void CopySoftwareRendererVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void DispatchRenderCommand() override; void FlushRender() override; void DrawRendererStats(bool is_idle_frame) override; - void CalcScissorRect(int* left, int* top, int* right, int* bottom); - - std::tuple ScaleVRAMCoordinates(s32 x, s32 y) const - { - return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); - } - - /// Computes the area affected by a VRAM transfer, including wrap-around of X. - Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const; - - /// Returns true if the VRAM copy shader should be used (oversized copies, masking). - bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; - - VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const; - VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset, bool set_mask, - bool check_mask) const; - VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth); @@ -340,22 +230,31 @@ protected: void SetBatchDepthBuffer(bool enabled); void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); - /// UBO data for adaptive smoothing. - struct SmoothingUBOData - { - float min_uv[2]; - float max_uv[2]; - float rcp_size[2]; - }; - /// Returns the number of mipmap levels used for adaptive smoothing. u32 GetAdaptiveDownsamplingMipLevels() const; - /// Returns the UBO data for an adaptive smoothing pass. - SmoothingUBOData GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width, - u32 tex_height) const; + void DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + + std::unique_ptr m_vram_texture; + std::unique_ptr m_vram_depth_texture; + std::unique_ptr m_vram_depth_view; + std::unique_ptr m_vram_read_texture; + std::unique_ptr m_vram_readback_texture; + std::unique_ptr m_vram_replacement_texture; + std::unique_ptr m_display_texture; + + std::unique_ptr m_vram_framebuffer; + std::unique_ptr m_vram_update_depth_framebuffer; + std::unique_ptr m_vram_readback_framebuffer; + std::unique_ptr m_display_framebuffer; + + std::unique_ptr m_vram_upload_buffer; + std::unique_ptr m_vram_write_texture; FixedHeapArray m_vram_shadow; + std::unique_ptr m_sw_renderer; BatchVertex* m_batch_start_vertex_ptr = nullptr; @@ -368,20 +267,17 @@ protected: u32 m_resolution_scale = 1; u32 m_multisamples = 1; u32 m_max_resolution_scale = 1; - u32 m_max_multisamples = 1; - RenderAPI m_render_api = RenderAPI::None; bool m_true_color = true; union { BitField m_supports_per_sample_shading; BitField m_supports_dual_source_blend; - BitField m_supports_adaptive_downsampling; - BitField m_supports_disable_color_perspective; - BitField m_per_sample_shading; - BitField m_scaled_dithering; - BitField m_chroma_smoothing; - BitField m_disable_color_perspective; + BitField m_supports_disable_color_perspective; + BitField m_per_sample_shading; + BitField m_scaled_dithering; + BitField m_chroma_smoothing; + BitField m_disable_color_perspective; u8 bits = 0; }; @@ -397,20 +293,45 @@ protected: // Bounding box of VRAM area that the GPU has drawn into. Common::Rectangle m_vram_dirty_rect; + // Changed state + bool m_batch_ubo_dirty = true; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + DimensionalArray, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; + + // [wrapped][interlaced] + DimensionalArray, 2, 2> m_vram_fill_pipelines{}; + + // [depth_test] + std::array, 2> m_vram_write_pipelines{}; + std::array, 2> m_vram_copy_pipelines{}; + + std::unique_ptr m_vram_readback_pipeline; + std::unique_ptr m_vram_update_depth_pipeline; + + // [depth_24][interlace_mode] + DimensionalArray, 3, 2> m_display_pipelines{}; + + // TODO: get rid of this, and use image blits instead where supported + std::unique_ptr m_copy_pipeline; + + std::unique_ptr m_downsample_texture; + std::unique_ptr m_downsample_render_texture; + std::unique_ptr m_downsample_framebuffer; + std::unique_ptr m_downsample_weight_texture; + std::unique_ptr m_downsample_weight_framebuffer; + std::unique_ptr m_downsample_first_pass_pipeline; + std::unique_ptr m_downsample_mid_pass_pipeline; + std::unique_ptr m_downsample_blur_pass_pipeline; + std::unique_ptr m_downsample_composite_pass_pipeline; + std::unique_ptr m_downsample_lod_sampler; + std::unique_ptr m_downsample_composite_sampler; + // Statistics RendererStats m_renderer_stats = {}; RendererStats m_last_renderer_stats = {}; - // Changed state - bool m_batch_ubo_dirty = true; - private: - enum : u32 - { - MIN_BATCH_VERTEX_COUNT = 6, - MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex) - }; - void LoadVertices(); ALWAYS_INLINE void AddVertex(const BatchVertex& v) diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp deleted file mode 100644 index d84883051..000000000 --- a/src/core/gpu_hw_d3d11.cpp +++ /dev/null @@ -1,1214 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "gpu_hw_d3d11.h" -#include "common/assert.h" -#include "common/d3d11/shader_compiler.h" -#include "common/log.h" -#include "common/timer.h" -#include "gpu_hw_shadergen.h" -#include "gpu_sw_backend.h" -#include "util/host_display.h" -#include "shader_cache_version.h" -#include "system.h" -#include "util/state_wrapper.h" -Log_SetChannel(GPU_HW_D3D11); - -GPU_HW_D3D11::GPU_HW_D3D11(ID3D11Device* device, ID3D11DeviceContext* context) : m_device(device), m_context(context) {} - -GPU_HW_D3D11::~GPU_HW_D3D11() -{ - g_host_display->ClearDisplayTexture(); - - DestroyShaders(); - DestroyStateObjects(); -} - -GPURenderer GPU_HW_D3D11::GetRendererType() const -{ - return GPURenderer::HardwareD3D11; -} - -bool GPU_HW_D3D11::Initialize() -{ - SetCapabilities(); - - if (!GPU_HW::Initialize()) - return false; - - if (!CreateFramebuffer()) - { - Log_ErrorPrintf("Failed to create framebuffer"); - return false; - } - - if (!CreateVertexBuffer()) - { - Log_ErrorPrintf("Failed to create vertex buffer"); - return false; - } - - if (!CreateUniformBuffer()) - { - Log_ErrorPrintf("Failed to create uniform buffer"); - return false; - } - - if (!CreateTextureBuffer()) - { - Log_ErrorPrintf("Failed to create texture buffer"); - return false; - } - - if (!CreateStateObjects()) - { - Log_ErrorPrintf("Failed to create state objects"); - return false; - } - - if (!CompileShaders()) - { - Log_ErrorPrintf("Failed to compile shaders"); - return false; - } - - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_D3D11::Reset(bool clear_vram) -{ - GPU_HW::Reset(clear_vram); - - if (clear_vram) - ClearFramebuffer(); -} - -bool GPU_HW_D3D11::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) -{ - if (host_texture) - { - ComPtr resource; - - D3D11::Texture* tex = static_cast(*host_texture); - if (sw.IsReading()) - { - if (tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - return false; - } - - m_context->CopySubresourceRegion(m_vram_texture.GetD3DTexture(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, nullptr); - } - else - { - if (!tex || tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - delete tex; - - tex = static_cast(g_host_display - ->CreateTexture(m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 1, - 1, m_vram_texture.GetSamples(), GPUTexture::Format::RGBA8, - nullptr, 0, false) - .release()); - *host_texture = tex; - if (!tex) - return false; - } - - m_context->CopySubresourceRegion(tex->GetD3DTexture(), 0, 0, 0, 0, m_vram_texture.GetD3DTexture(), 0, nullptr); - } - } - - return GPU_HW::DoState(sw, host_texture, update_display); -} - -void GPU_HW_D3D11::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - m_context->GSSetShader(nullptr, nullptr, 0); - - // In D3D11 we can't leave a buffer mapped across a Present() call. - FlushRender(); -} - -void GPU_HW_D3D11::RestoreGraphicsAPIState() -{ - const UINT stride = sizeof(BatchVertex); - const UINT offset = 0; - m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset); - m_context->IASetInputLayout(m_batch_input_layout.Get()); - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->GSSetShader(nullptr, nullptr, 0); - m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); - m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get()); - m_context->RSSetState(m_cull_none_rasterizer_state.Get()); - SetViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - SetScissorFromDrawingArea(); - m_batch_ubo_dirty = true; -} - -void GPU_HW_D3D11::UpdateSettings() -{ - GPU_HW::UpdateSettings(); - - bool framebuffer_changed, shaders_changed; - UpdateHWSettings(&framebuffer_changed, &shaders_changed); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); - g_host_display->ClearDisplayTexture(); - CreateFramebuffer(); - } - - if (shaders_changed) - { - DestroyShaders(); - DestroyStateObjects(); - CreateStateObjects(); - CompileShaders(); - } - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); - ResetGraphicsAPIState(); - } -} - -void GPU_HW_D3D11::MapBatchVertexPointer(u32 required_vertices) -{ - DebugAssert(!m_batch_start_vertex_ptr); - - const D3D11::StreamBuffer::MappingResult res = - m_vertex_stream_buffer.Map(m_context.Get(), sizeof(BatchVertex), required_vertices * sizeof(BatchVertex)); - - m_batch_start_vertex_ptr = static_cast(res.pointer); - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned; - m_batch_base_vertex = res.index_aligned; -} - -void GPU_HW_D3D11::UnmapBatchVertexPointer(u32 used_vertices) -{ - DebugAssert(m_batch_start_vertex_ptr); - m_vertex_stream_buffer.Unmap(m_context.Get(), used_vertices * sizeof(BatchVertex)); - m_batch_start_vertex_ptr = nullptr; - m_batch_end_vertex_ptr = nullptr; - m_batch_current_vertex_ptr = nullptr; -} - -void GPU_HW_D3D11::SetCapabilities() -{ - const u32 max_texture_size = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; - const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; - - m_max_resolution_scale = max_texture_scale; - m_supports_dual_source_blend = true; - m_supports_per_sample_shading = (m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_1); - m_supports_adaptive_downsampling = true; - m_supports_disable_color_perspective = true; - - m_max_multisamples = 1; - for (u32 multisamples = 2; multisamples < D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) - { - UINT num_quality_levels; - if (SUCCEEDED( - m_device->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, multisamples, &num_quality_levels)) && - num_quality_levels > 0) - { - m_max_multisamples = multisamples; - } - } -} - -bool GPU_HW_D3D11::CreateFramebuffer() -{ - DestroyFramebuffer(); - - // scale vram size to internal resolution - const u32 texture_width = VRAM_WIDTH * m_resolution_scale; - const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; - const u8 samples = static_cast(m_multisamples); - const GPUTexture::Format texture_format = GPUTexture::Format::RGBA8; - const GPUTexture::Format depth_format = GPUTexture::Format::D16; - - if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, samples, texture_format, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_depth_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, samples, depth_format, - D3D11_BIND_DEPTH_STENCIL) || - !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, 1, 1, 1, texture_format, - D3D11_BIND_SHADER_RESOURCE) || - !m_display_texture.Create( - m_device.Get(), - ((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) * m_resolution_scale, - GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, texture_format, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, texture_format, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) - { - return false; - } - - const CD3D11_DEPTH_STENCIL_VIEW_DESC depth_view_desc(samples > 1 ? D3D11_DSV_DIMENSION_TEXTURE2DMS : - D3D11_DSV_DIMENSION_TEXTURE2D, - D3D11::Texture::GetDXGIFormat(depth_format)); - HRESULT hr = - m_device->CreateDepthStencilView(m_vram_depth_texture, &depth_view_desc, m_vram_depth_view.GetAddressOf()); - if (FAILED(hr)) - return false; - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - const u32 levels = GetAdaptiveDownsamplingMipLevels(); - - if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast(levels), 1, - texture_format, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET) || - !m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), - texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Format::R8, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) - { - return false; - } - - m_downsample_mip_views.resize(levels); - for (u32 i = 0; i < levels; i++) - { - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - - hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc, - m_downsample_mip_views[i].first.GetAddressOf()); - if (FAILED(hr)) - return false; - - hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc, - m_downsample_mip_views[i].second.GetAddressOf()); - if (FAILED(hr)) - return false; - } - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, texture_format, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET)) - { - return false; - } - } - - m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr); - SetFullVRAMDirtyRectangle(); - return true; -} - -void GPU_HW_D3D11::ClearFramebuffer() -{ - static constexpr std::array color = {}; - m_context->ClearRenderTargetView(m_vram_texture.GetD3DRTV(), color.data()); - m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, m_pgxp_depth_buffer ? 1.0f : 0.0f, 0); - m_context->ClearRenderTargetView(m_display_texture, color.data()); - SetFullVRAMDirtyRectangle(); - m_last_depth_z = 1.0f; -} - -void GPU_HW_D3D11::DestroyFramebuffer() -{ - m_downsample_mip_views.clear(); - m_downsample_weight_texture.Destroy(); - m_downsample_texture.Destroy(); - - m_vram_read_texture.Destroy(); - m_vram_depth_view.Reset(); - m_vram_depth_texture.Destroy(); - m_vram_texture.Destroy(); - m_vram_encoding_texture.Destroy(); - m_display_texture.Destroy(); -} - -bool GPU_HW_D3D11::CreateVertexBuffer() -{ - return m_vertex_stream_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE); -} - -bool GPU_HW_D3D11::CreateUniformBuffer() -{ - return m_uniform_stream_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, MAX_UNIFORM_BUFFER_SIZE); -} - -bool GPU_HW_D3D11::CreateTextureBuffer() -{ - if (!m_texture_stream_buffer.Create(m_device.Get(), D3D11_BIND_SHADER_RESOURCE, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) - return false; - - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0, - VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16)); - const HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc, - m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Creation of texture buffer SRV failed: 0x%08X", hr); - return false; - } - - return true; -} - -bool GPU_HW_D3D11::CreateStateObjects() -{ - HRESULT hr; - - CD3D11_RASTERIZER_DESC rs_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rs_desc.CullMode = D3D11_CULL_NONE; - rs_desc.ScissorEnable = TRUE; - rs_desc.MultisampleEnable = IsUsingMultisampling(); - rs_desc.DepthClipEnable = FALSE; - hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - if (IsUsingMultisampling()) - { - rs_desc.MultisampleEnable = FALSE; - hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state_no_msaa.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - } - else - { - m_cull_none_rasterizer_state_no_msaa = m_cull_none_rasterizer_state; - } - - CD3D11_DEPTH_STENCIL_DESC ds_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - ds_desc.DepthEnable = FALSE; - ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_disabled_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthEnable = TRUE; - ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - ds_desc.DepthFunc = D3D11_COMPARISON_ALWAYS; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_always_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_greater_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - bl_desc.RenderTarget[0].RenderTargetWriteMask = 0; - hr = m_device->CreateBlendState(&bl_desc, m_blend_no_color_writes_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; - sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; - hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - hr = m_device->CreateSamplerState(&sampler_desc, m_trilinear_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { - bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - if (transparency_mode != static_cast(GPUTransparencyMode::Disabled) || - m_texture_filtering != GPUTextureFilter::Nearest) - { - bl_desc.RenderTarget[0].BlendEnable = TRUE; - bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA; - bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - bl_desc.RenderTarget[0].BlendOp = - (transparency_mode == static_cast(GPUTransparencyMode::BackgroundMinusForeground)) ? - D3D11_BLEND_OP_REV_SUBTRACT : - D3D11_BLEND_OP_ADD; - bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - } - - hr = m_device->CreateBlendState(&bl_desc, m_batch_blend_states[transparency_mode].ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - } - - return true; -} - -void GPU_HW_D3D11::DestroyStateObjects() -{ - m_batch_blend_states = {}; - m_linear_sampler_state.Reset(); - m_point_sampler_state.Reset(); - m_trilinear_sampler_state.Reset(); - m_blend_no_color_writes_state.Reset(); - m_blend_disabled_state.Reset(); - m_depth_test_greater_state.Reset(); - m_depth_test_less_state.Reset(); - m_depth_test_always_state.Reset(); - m_depth_disabled_state.Reset(); - m_cull_none_rasterizer_state.Reset(); - m_cull_none_rasterizer_state_no_msaa.Reset(); -} - -bool GPU_HW_D3D11::CompileShaders() -{ - D3D11::ShaderCache shader_cache; - shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, - g_settings.gpu_use_debug_device); - - GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); - - ShaderCompileProgressTracker progress("Compiling Shaders", - 1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1); - - // input layout - { - static constexpr std::array attributes = { - {{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; - - // we need a vertex shader... - ComPtr vs_bytecode = - shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true)); - if (!vs_bytecode) - return false; - - const UINT num_attributes = static_cast(attributes.size()) - (m_using_uv_limits ? 0 : 1); - const HRESULT hr = - m_device->CreateInputLayout(attributes.data(), num_attributes, vs_bytecode->GetBufferPointer(), - vs_bytecode->GetBufferSize(), m_batch_input_layout.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateInputLayout failed: 0x%08X", hr); - return false; - } - } - - progress.Increment(); - - m_screen_quad_vertex_shader = - shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateScreenQuadVertexShader()); - m_uv_quad_vertex_shader = shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateUVQuadVertexShader()); - if (!m_screen_quad_vertex_shader || !m_uv_quad_vertex_shader) - return false; - - progress.Increment(); - - for (u8 textured = 0; textured < 2; textured++) - { - const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); - m_batch_vertex_shaders[textured] = shader_cache.GetVertexShader(m_device.Get(), vs); - if (!m_batch_vertex_shaders[textured]) - return false; - - progress.Increment(); - } - - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const std::string ps = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - - m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] = - shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing]) - return false; - - progress.Increment(); - } - } - } - } - - m_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateCopyFragmentShader()); - if (!m_copy_pixel_shader) - return false; - - progress.Increment(); - - for (u8 wrapped = 0; wrapped < 2; wrapped++) - { - for (u8 interlaced = 0; interlaced < 2; interlaced++) - { - const std::string ps = - shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)); - m_vram_fill_pixel_shaders[wrapped][interlaced] = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_vram_fill_pixel_shaders[wrapped][interlaced]) - return false; - - progress.Increment(); - } - } - - m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader()); - if (!m_vram_read_pixel_shader) - return false; - - progress.Increment(); - - m_vram_write_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false)); - if (!m_vram_write_pixel_shader) - return false; - - progress.Increment(); - - m_vram_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMCopyFragmentShader()); - if (!m_vram_copy_pixel_shader) - return false; - - progress.Increment(); - - m_vram_update_depth_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMUpdateDepthFragmentShader()); - if (!m_vram_update_depth_pixel_shader) - return false; - - progress.Increment(); - - for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) - { - for (u8 interlacing = 0; interlacing < 3; interlacing++) - { - const std::string ps = shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24bit), static_cast(interlacing), - ConvertToBoolUnchecked(depth_24bit) && m_chroma_smoothing); - m_display_pixel_shaders[depth_24bit][interlacing] = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_display_pixel_shaders[depth_24bit][interlacing]) - return false; - - progress.Increment(); - } - } - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - m_downsample_first_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); - m_downsample_mid_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); - m_downsample_blur_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); - m_downsample_composite_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); - - if (!m_downsample_first_pass_pixel_shader || !m_downsample_mid_pass_pixel_shader || - !m_downsample_blur_pass_pixel_shader || !m_downsample_composite_pixel_shader) - { - return false; - } - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - m_downsample_first_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateBoxSampleDownsampleFragmentShader()); - if (!m_downsample_first_pass_pixel_shader) - return false; - } - - progress.Increment(); - -#undef UPDATE_PROGRESS - - return true; -} - -void GPU_HW_D3D11::DestroyShaders() -{ - m_downsample_composite_pixel_shader.Reset(); - m_downsample_blur_pass_pixel_shader.Reset(); - m_downsample_mid_pass_pixel_shader.Reset(); - m_downsample_first_pass_pixel_shader.Reset(); - m_display_pixel_shaders = {}; - m_vram_update_depth_pixel_shader.Reset(); - m_vram_copy_pixel_shader.Reset(); - m_vram_write_pixel_shader.Reset(); - m_vram_read_pixel_shader.Reset(); - m_vram_fill_pixel_shaders = {}; - m_copy_pixel_shader.Reset(); - m_uv_quad_vertex_shader.Reset(); - m_screen_quad_vertex_shader.Reset(); - m_batch_pixel_shaders = {}; - m_batch_vertex_shaders = {}; - m_batch_input_layout.Reset(); -} - -void GPU_HW_D3D11::UploadUniformBuffer(const void* data, u32 data_size) -{ - Assert(data_size <= MAX_UNIFORM_BUFFER_SIZE); - - const auto res = m_uniform_stream_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, data_size); - std::memcpy(res.pointer, data, data_size); - m_uniform_stream_buffer.Unmap(m_context.Get(), data_size); - - m_context->VSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray()); - - m_renderer_stats.num_uniform_buffer_updates++; -} - -void GPU_HW_D3D11::SetViewport(u32 x, u32 y, u32 width, u32 height) -{ - const CD3D11_VIEWPORT vp(static_cast(x), static_cast(y), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); -} - -void GPU_HW_D3D11::SetScissor(u32 x, u32 y, u32 width, u32 height) -{ - const CD3D11_RECT rc(x, y, x + width, y + height); - m_context->RSSetScissorRects(1, &rc); -} - -void GPU_HW_D3D11::SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height) -{ - SetViewport(x, y, width, height); - SetScissor(x, y, width, height); -} - -void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size) -{ - if (uniforms) - { - UploadUniformBuffer(uniforms, uniforms_size); - m_batch_ubo_dirty = true; - } - - m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0); - m_context->GSSetShader(nullptr, nullptr, 0); - m_context->PSSetShader(shader, nullptr, 0); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - -bool GPU_HW_D3D11::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - if (m_vram_replacement_texture.GetWidth() < tex->GetWidth() || - m_vram_replacement_texture.GetHeight() < tex->GetHeight()) - { - if (!m_vram_replacement_texture.Create(m_device.Get(), tex->GetWidth(), tex->GetHeight(), 1, 1, 1, - GPUTexture::Format::RGBA8, D3D11_BIND_SHADER_RESOURCE, tex->GetPixels(), - tex->GetPitch(), true)) - { - return false; - } - } - else - { - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_vram_replacement_texture, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Texture map failed: %08X", hr); - return false; - } - - const u32 copy_size = std::min(tex->GetPitch(), sr.RowPitch); - const u8* src_ptr = reinterpret_cast(tex->GetPixels()); - u8* dst_ptr = static_cast(sr.pData); - for (u32 i = 0; i < tex->GetHeight(); i++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += tex->GetPitch(); - dst_ptr += sr.RowPitch; - } - - m_context->Unmap(m_vram_replacement_texture, 0); - } - - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_vram_replacement_texture.GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_linear_sampler_state.GetAddressOf()); - SetViewportAndScissor(dst_x, dst_y, width, height); - - const float uniforms[] = {0.0f, 0.0f, 1.0f, 1.0f}; - DrawUtilityShader(m_copy_pixel_shader.Get(), uniforms, sizeof(uniforms)); - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) -{ - const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled); - - m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0); - - m_context->PSSetShader(m_batch_pixel_shaders[static_cast(render_mode)][static_cast(m_batch.texture_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] - .Get(), - nullptr, 0); - - const GPUTransparencyMode transparency_mode = - (render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode; - m_context->OMSetBlendState(m_batch_blend_states[static_cast(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu); - - m_context->OMSetDepthStencilState( - (m_batch.use_depth_buffer ? - m_depth_test_less_state.Get() : - (m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())), - 0); - - m_context->Draw(num_vertices, base_vertex); -} - -void GPU_HW_D3D11::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - CD3D11_RECT rc(left, top, right, bottom); - m_context->RSSetScissorRects(1, &rc); -} - -void GPU_HW_D3D11::ClearDisplay() -{ - GPU_HW::ClearDisplay(); - - g_host_display->ClearDisplayTexture(); - - static constexpr std::array clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; - m_context->ClearRenderTargetView(m_display_texture.GetD3DRTV(), clear_color.data()); -} - -void GPU_HW_D3D11::UpdateDisplay() -{ - GPU_HW::UpdateDisplay(); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - UpdateVRAMReadTexture(); - g_host_display->SetDisplayTexture(&m_vram_read_texture, 0, 0, m_vram_read_texture.GetWidth(), - m_vram_read_texture.GetHeight()); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - } - - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) - { - - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - } - else - { - m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - ID3D11PixelShader* display_pixel_shader = - m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get(); - - Assert(scaled_display_width <= m_display_texture.GetWidth() && - scaled_display_height <= m_display_texture.GetHeight()); - - SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); - DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); - - if (IsUsingDownsampling()) - DownsampleFramebuffer(m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - else - g_host_display->SetDisplayTexture(&m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - - RestoreGraphicsAPIState(); - } - } -} - -void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - { - ReadSoftwareRendererVRAM(x, y, width, height); - return; - } - - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); - - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - SetViewportAndScissor(0, 0, encoded_width, encoded_height); - DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); - - // Stage the readback and copy it into our shadow buffer. - g_host_display->DownloadTexture(&m_vram_encoding_texture, 0, 0, encoded_width, encoded_height, - reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), - VRAM_WIDTH * sizeof(u16)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - if (IsUsingSoftwareRendererForReadbacks()) - FillSoftwareRendererVRAM(x, y, width, height, color); - - GPU_HW::FillVRAM(x, y, width, height, color); - - m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); - - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, - bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); - - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - DrawUtilityShader(m_vram_fill_pixel_shaders[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] - [BoolToUInt8(IsInterlacedRenderingEnabled())] - .Get(), - &uniforms, sizeof(uniforms)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - if (IsUsingSoftwareRendererForReadbacks()) - UpdateSoftwareRendererVRAM(x, y, width, height, data, set_mask, check_mask); - - const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, set_mask, check_mask); - - if (!check_mask) - { - const TextureReplacementTexture* rtex = g_texture_replacements.GetVRAMWriteReplacement(width, height, data); - if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, - width * m_resolution_scale, height * m_resolution_scale)) - { - return; - } - } - - const u32 num_pixels = width * height; - const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); - std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); - m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16)); - - const VRAMWriteUBOData uniforms = - GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask); - m_context->OMSetDepthStencilState( - (check_mask && !m_pgxp_depth_buffer) ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); - - // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); - - DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); - - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) - { - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - if (m_vram_dirty_rect.Intersects(src_bounds)) - UpdateVRAMReadTexture(); - IncludeVRAMDirtyRectangle(dst_bounds); - - const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); - - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), - dst_bounds_scaled.GetHeight()); - m_context->OMSetDepthStencilState((m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) ? - m_depth_test_greater_state.Get() : - m_depth_test_always_state.Get(), - 0); - m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); - DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms)); - RestoreGraphicsAPIState(); - - if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) - m_current_depth++; - - return; - } - - // We can't CopySubresourceRegion to the same resource. So use the shadow texture if we can, but that may need to be - // updated first. Copying to the same resource seemed to work on Windows 10, but breaks on Windows 7. But, it's - // against the API spec, so better to be safe than sorry. - if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) - UpdateVRAMReadTexture(); - - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); - - src_x *= m_resolution_scale; - src_y *= m_resolution_scale; - dst_x *= m_resolution_scale; - dst_y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - const CD3D11_BOX src_box(src_x, src_y, 0, src_x + width, src_y + height, 1); - m_context->CopySubresourceRegion(m_vram_texture, 0, dst_x, dst_y, 0, m_vram_read_texture, 0, &src_box); -} - -void GPU_HW_D3D11::UpdateVRAMReadTexture() -{ - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; - const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1); - - if (m_vram_texture.IsMultisampled()) - { - m_context->ResolveSubresource(m_vram_read_texture.GetD3DTexture(), 0, m_vram_texture.GetD3DTexture(), 0, - m_vram_texture.GetDXGIFormat()); - } - else - { - m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, - &src_box); - } - - GPU_HW::UpdateVRAMReadTexture(); -} - -void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - SetViewportAndScissor(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - - m_context->OMSetRenderTargets(0, nullptr, m_vram_depth_view.Get()); - m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); - m_context->OMSetBlendState(m_blend_no_color_writes_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - DrawUtilityShader(m_vram_update_depth_pixel_shader.Get(), nullptr, 0); - - m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::ClearDepthBuffer() -{ - DebugAssert(m_pgxp_depth_buffer); - - m_context->ClearDepthStencilView(m_vram_depth_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0); - m_last_depth_z = 1.0f; -} - -void GPU_HW_D3D11::DownsampleFramebuffer(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - DownsampleFramebufferAdaptive(source, left, top, width, height); - else - DownsampleFramebufferBoxFilter(source, left, top, width, height); -} - -void GPU_HW_D3D11::DownsampleFramebufferAdaptive(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source, 0, &src_box); - m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); - m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0); - - // create mip chain - const u32 levels = m_downsample_texture.GetLevels(); - for (u32 level = 1; level < levels; level++) - { - static constexpr float clear_color[4] = {}; - - SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level); - m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf()); - - const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader( - (level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); - } - - // blur pass at lowest level - { - const u32 last_level = levels - 1; - static constexpr float clear_color[4] = {}; - - SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level); - m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - - const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); - } - - // composite downsampled and upsampled images together - { - SetViewportAndScissor(left, top, width, height); - m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); - - ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(), - m_downsample_weight_texture.GetD3DSRV()}; - ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()}; - m_context->PSSetShaderResources(0, countof(srvs), srvs); - m_context->PSSetSamplers(0, countof(samplers), samplers); - m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0); - m_context->Draw(3, 0); - } - - ID3D11ShaderResourceView* const null_srvs[2] = {}; - m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs); - m_batch_ubo_dirty = true; - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_display_texture, left, top, width, height); -} - -void GPU_HW_D3D11::DownsampleFramebufferBoxFilter(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - const u32 ds_left = left / m_resolution_scale; - const u32 ds_top = top / m_resolution_scale; - const u32 ds_width = width / m_resolution_scale; - const u32 ds_height = height / m_resolution_scale; - static constexpr float clear_color[4] = {}; - - m_context->ClearRenderTargetView(m_downsample_texture.GetD3DRTV(), clear_color); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->OMSetRenderTargets(1, m_downsample_texture.GetD3DRTVArray(), nullptr); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_downsample_first_pass_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, source.GetD3DSRVArray()); - SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); - m_context->Draw(3, 0); - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_downsample_texture, ds_left, ds_top, ds_width, ds_height); -} - -std::unique_ptr GPU::CreateHardwareD3D11Renderer() -{ - if (!Host::AcquireHostDisplay(RenderAPI::D3D11)) - { - Log_ErrorPrintf("Host render API is incompatible"); - return nullptr; - } - - ID3D11Device* device = static_cast(g_host_display->GetDevice()); - ID3D11DeviceContext* context = static_cast(g_host_display->GetContext()); - if (!device || !context) - return nullptr; - - std::unique_ptr gpu(std::make_unique(device, context)); - if (!gpu->Initialize()) - return nullptr; - - return gpu; -} diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h deleted file mode 100644 index 0a6f7533d..000000000 --- a/src/core/gpu_hw_d3d11.h +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/d3d11/shader_cache.h" -#include "common/d3d11/stream_buffer.h" -#include "common/d3d11/texture.h" -#include "gpu_hw.h" -#include "texture_replacements.h" -#include -#include -#include -#include -#include - -class GPU_HW_D3D11 final : public GPU_HW -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - GPU_HW_D3D11(ID3D11Device* device, ID3D11DeviceContext* context); - ~GPU_HW_D3D11() override; - - GPURenderer GetRendererType() const override; - - bool Initialize() override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - - void ResetGraphicsAPIState() override; - void RestoreGraphicsAPIState() override; - void UpdateSettings() override; - -protected: - void ClearDisplay() override; - void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateVRAMReadTexture() override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; - -private: - enum : u32 - { - // Currently we don't stream uniforms, instead just re-map the buffer every time and let the driver take care of it. - MAX_UNIFORM_BUFFER_SIZE = 64 - }; - - void SetCapabilities(); - bool CreateFramebuffer(); - void ClearFramebuffer(); - void DestroyFramebuffer(); - - bool CreateVertexBuffer(); - bool CreateUniformBuffer(); - bool CreateTextureBuffer(); - bool CreateStateObjects(); - void DestroyStateObjects(); - - bool CompileShaders(); - void DestroyShaders(); - void SetViewport(u32 x, u32 y, u32 width, u32 height); - void SetScissor(u32 x, u32 y, u32 width, u32 height); - void SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height); - - void DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size); - - bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); - - void DownsampleFramebuffer(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferAdaptive(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferBoxFilter(D3D11::Texture& source, u32 left, u32 top, u32 width, u32 height); - - ComPtr m_device; - ComPtr m_context; - - // downsample texture - used for readbacks at >1xIR. - D3D11::Texture m_vram_texture; - D3D11::Texture m_vram_depth_texture; - ComPtr m_vram_depth_view; - D3D11::Texture m_vram_read_texture; - D3D11::Texture m_vram_encoding_texture; - D3D11::Texture m_display_texture; - - D3D11::StreamBuffer m_vertex_stream_buffer; - - D3D11::StreamBuffer m_uniform_stream_buffer; - - D3D11::StreamBuffer m_texture_stream_buffer; - - ComPtr m_texture_stream_buffer_srv_r16ui; - - ComPtr m_cull_none_rasterizer_state; - ComPtr m_cull_none_rasterizer_state_no_msaa; - - ComPtr m_depth_disabled_state; - ComPtr m_depth_test_always_state; - ComPtr m_depth_test_less_state; - ComPtr m_depth_test_greater_state; - - ComPtr m_blend_disabled_state; - ComPtr m_blend_no_color_writes_state; - - ComPtr m_point_sampler_state; - ComPtr m_linear_sampler_state; - ComPtr m_trilinear_sampler_state; - - std::array, 5> m_batch_blend_states; // [transparency_mode] - ComPtr m_batch_input_layout; - std::array, 2> m_batch_vertex_shaders; // [textured] - std::array, 2>, 2>, 9>, 4> - m_batch_pixel_shaders; // [render_mode][texture_mode][dithering][interlacing] - - ComPtr m_screen_quad_vertex_shader; - ComPtr m_uv_quad_vertex_shader; - ComPtr m_copy_pixel_shader; - std::array, 2>, 2> m_vram_fill_pixel_shaders; // [wrapped][interlaced] - ComPtr m_vram_read_pixel_shader; - ComPtr m_vram_write_pixel_shader; - ComPtr m_vram_copy_pixel_shader; - ComPtr m_vram_update_depth_pixel_shader; - std::array, 3>, 2> m_display_pixel_shaders; // [depth_24][interlaced] - - D3D11::Texture m_vram_replacement_texture; - - // downsampling - ComPtr m_downsample_first_pass_pixel_shader; - ComPtr m_downsample_mid_pass_pixel_shader; - ComPtr m_downsample_blur_pass_pixel_shader; - ComPtr m_downsample_composite_pixel_shader; - D3D11::Texture m_downsample_texture; - D3D11::Texture m_downsample_weight_texture; - std::vector, ComPtr>> m_downsample_mip_views; -}; diff --git a/src/core/gpu_hw_d3d12.cpp b/src/core/gpu_hw_d3d12.cpp deleted file mode 100644 index 21f018411..000000000 --- a/src/core/gpu_hw_d3d12.cpp +++ /dev/null @@ -1,1197 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "gpu_hw_d3d12.h" -#include "common/align.h" -#include "common/assert.h" -#include "common/d3d11/shader_compiler.h" -#include "common/d3d12/context.h" -#include "common/d3d12/descriptor_heap_manager.h" -#include "common/d3d12/shader_cache.h" -#include "common/d3d12/util.h" -#include "common/log.h" -#include "common/scoped_guard.h" -#include "common/timer.h" -#include "gpu_hw_shadergen.h" -#include "util/host_display.h" -#include "system.h" -Log_SetChannel(GPU_HW_D3D12); - -GPU_HW_D3D12::GPU_HW_D3D12() = default; - -GPU_HW_D3D12::~GPU_HW_D3D12() -{ - g_host_display->ClearDisplayTexture(); - - DestroyResources(); -} - -GPURenderer GPU_HW_D3D12::GetRendererType() const -{ - return GPURenderer::HardwareD3D12; -} - -bool GPU_HW_D3D12::Initialize() -{ - SetCapabilities(); - - if (!GPU_HW::Initialize()) - return false; - - if (!CreateRootSignatures()) - { - Log_ErrorPrintf("Failed to create root signatures"); - return false; - } - - if (!CreateSamplers()) - { - Log_ErrorPrintf("Failed to create samplers"); - return false; - } - - if (!CreateVertexBuffer()) - { - Log_ErrorPrintf("Failed to create vertex buffer"); - return false; - } - - if (!CreateUniformBuffer()) - { - Log_ErrorPrintf("Failed to create uniform buffer"); - return false; - } - - if (!CreateTextureBuffer()) - { - Log_ErrorPrintf("Failed to create texture buffer"); - return false; - } - - if (!CreateFramebuffer()) - { - Log_ErrorPrintf("Failed to create framebuffer"); - return false; - } - - if (!CompilePipelines()) - { - Log_ErrorPrintf("Failed to compile pipelines"); - return false; - } - - RestoreGraphicsAPIState(); - UpdateDepthBufferFromMaskBit(); - return true; -} - -void GPU_HW_D3D12::Reset(bool clear_vram) -{ - GPU_HW::Reset(clear_vram); - - if (clear_vram) - ClearFramebuffer(); -} - -void GPU_HW_D3D12::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); -} - -void GPU_HW_D3D12::RestoreGraphicsAPIState() -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->OMSetRenderTargets(1, &m_vram_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, - &m_vram_depth_texture.GetRTVOrDSVDescriptor().cpu_handle); - - const D3D12_VERTEX_BUFFER_VIEW vbv{m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), - sizeof(BatchVertex)}; - cmdlist->IASetVertexBuffers(0, 1, &vbv); - cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - - cmdlist->SetGraphicsRootSignature(m_batch_root_signature.Get()); - cmdlist->SetGraphicsRootConstantBufferView(0, - m_uniform_stream_buffer.GetGPUPointer() + m_current_uniform_buffer_offset); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_read_texture.GetSRVDescriptor().gpu_handle); - cmdlist->SetGraphicsRootDescriptorTable(2, m_point_sampler.gpu_handle); - - D3D12::SetViewport(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - - SetScissorFromDrawingArea(); -} - -void GPU_HW_D3D12::UpdateSettings() -{ - GPU_HW::UpdateSettings(); - - bool framebuffer_changed, shaders_changed; - UpdateHWSettings(&framebuffer_changed, &shaders_changed); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); - } - - // Everything should be finished executing before recreating resources. - g_host_display->ClearDisplayTexture(); - g_d3d12_context->ExecuteCommandList(true); - - if (framebuffer_changed) - CreateFramebuffer(); - - if (shaders_changed) - { - // clear it since we draw a loading screen and it's not in the correct state - DestroyPipelines(); - CompilePipelines(); - } - - // this has to be done here, because otherwise we're using destroyed pipelines in the same cmdbuffer - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); - ResetGraphicsAPIState(); - } -} - -void GPU_HW_D3D12::MapBatchVertexPointer(u32 required_vertices) -{ - DebugAssert(!m_batch_start_vertex_ptr); - - const u32 required_space = required_vertices * sizeof(BatchVertex); - if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in vertex stream buffer", required_space); - g_d3d12_context->ExecuteCommandList(false); - RestoreGraphicsAPIState(); - if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) - Panic("Failed to reserve vertex stream buffer memory"); - } - - m_batch_start_vertex_ptr = static_cast(m_vertex_stream_buffer.GetCurrentHostPointer()); - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + (m_vertex_stream_buffer.GetCurrentSpace() / sizeof(BatchVertex)); - m_batch_base_vertex = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(BatchVertex); -} - -void GPU_HW_D3D12::UnmapBatchVertexPointer(u32 used_vertices) -{ - DebugAssert(m_batch_start_vertex_ptr); - if (used_vertices > 0) - m_vertex_stream_buffer.CommitMemory(used_vertices * sizeof(BatchVertex)); - - m_batch_start_vertex_ptr = nullptr; - m_batch_end_vertex_ptr = nullptr; - m_batch_current_vertex_ptr = nullptr; -} - -void GPU_HW_D3D12::UploadUniformBuffer(const void* data, u32 data_size) -{ - if (!m_uniform_stream_buffer.ReserveMemory(data_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in uniform stream buffer", data_size); - g_d3d12_context->ExecuteCommandList(false); - RestoreGraphicsAPIState(); - if (!m_uniform_stream_buffer.ReserveMemory(data_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) - Panic("Failed to reserve uniform stream buffer memory"); - } - - m_current_uniform_buffer_offset = m_uniform_stream_buffer.GetCurrentOffset(); - std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size); - m_uniform_stream_buffer.CommitMemory(data_size); - - g_d3d12_context->GetCommandList()->SetGraphicsRootConstantBufferView(0, m_uniform_stream_buffer.GetGPUPointer() + - m_current_uniform_buffer_offset); -} - -void GPU_HW_D3D12::SetCapabilities() -{ - // TODO: Query from device - const u32 max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; - const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; - Log_InfoPrintf("Max texture size: %ux%u", max_texture_size, max_texture_size); - m_max_resolution_scale = max_texture_scale; - - m_max_multisamples = 1; - for (u32 multisamples = 2; multisamples < D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) - { - D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast(multisamples)}; - - if (SUCCEEDED(g_d3d12_context->GetDevice()->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, - sizeof(fd))) && - fd.NumQualityLevels > 0) - { - m_max_multisamples = multisamples; - } - } - - m_supports_dual_source_blend = true; - m_supports_per_sample_shading = true; - m_supports_disable_color_perspective = true; - Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported"); - Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); - Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); -} - -void GPU_HW_D3D12::DestroyResources() -{ - // Everything should be finished executing before recreating resources. - if (g_d3d12_context) - g_d3d12_context->ExecuteCommandList(true); - - DestroyFramebuffer(); - DestroyPipelines(); - - g_d3d12_context->GetSamplerHeapManager().Free(&m_point_sampler); - g_d3d12_context->GetSamplerHeapManager().Free(&m_linear_sampler); - g_d3d12_context->GetDescriptorHeapManager().Free(&m_texture_stream_buffer_srv); - - m_vertex_stream_buffer.Destroy(false); - m_uniform_stream_buffer.Destroy(false); - m_texture_stream_buffer.Destroy(false); - - m_single_sampler_root_signature.Reset(); - m_batch_root_signature.Reset(); -} - -bool GPU_HW_D3D12::CreateRootSignatures() -{ - D3D12::RootSignatureBuilder rsbuilder; - rsbuilder.SetInputAssemblerFlag(); - rsbuilder.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - m_batch_root_signature = rsbuilder.Create(); - if (!m_batch_root_signature) - return false; - - rsbuilder.Add32BitConstants(0, MAX_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - m_single_sampler_root_signature = rsbuilder.Create(); - if (!m_single_sampler_root_signature) - return false; - - return true; -} - -bool GPU_HW_D3D12::CreateSamplers() -{ - D3D12_SAMPLER_DESC desc = {}; - D3D12::SetDefaultSampler(&desc); - desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - - if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_point_sampler)) - return false; - - g_d3d12_context->GetDevice()->CreateSampler(&desc, m_point_sampler.cpu_handle); - - desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - - if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_linear_sampler)) - return false; - - g_d3d12_context->GetDevice()->CreateSampler(&desc, m_linear_sampler.cpu_handle); - return true; -} - -bool GPU_HW_D3D12::CreateFramebuffer() -{ - DestroyFramebuffer(); - - // scale vram size to internal resolution - const u32 texture_width = VRAM_WIDTH * m_resolution_scale; - const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; - const DXGI_FORMAT texture_format = DXGI_FORMAT_R8G8B8A8_UNORM; - const DXGI_FORMAT depth_format = DXGI_FORMAT_D16_UNORM; - - if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, m_multisamples, texture_format, texture_format, - texture_format, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || - !m_vram_depth_texture.Create( - texture_width, texture_height, 1, 1, m_multisamples, depth_format, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, - depth_format, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, 1, texture_format, texture_format, - DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE) || - !m_display_texture.Create(texture_width, texture_height, 1, 1, 1, texture_format, texture_format, texture_format, - DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || - !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, texture_format, texture_format, texture_format, - DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || - !m_vram_readback_staging_texture.Create(VRAM_WIDTH / 2, VRAM_HEIGHT, texture_format, false)) - { - return false; - } - - D3D12::SetObjectName(m_vram_texture, "VRAM Texture"); - D3D12::SetObjectName(m_vram_depth_texture, "VRAM Depth Texture"); - D3D12::SetObjectName(m_vram_read_texture, "VRAM Read/Sample Texture"); - D3D12::SetObjectName(m_display_texture, "VRAM Display Texture"); - D3D12::SetObjectName(m_vram_read_texture, "VRAM Readback Texture"); - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - m_vram_depth_texture.TransitionToState(D3D12_RESOURCE_STATE_DEPTH_WRITE); - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - ClearDisplay(); - SetFullVRAMDirtyRectangle(); - return true; -} - -void GPU_HW_D3D12::ClearFramebuffer() -{ - static constexpr float clear_color[4] = {}; - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - cmdlist->ClearRenderTargetView(m_vram_texture.GetRTVOrDSVDescriptor(), clear_color, 0, nullptr); - cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, - m_pgxp_depth_buffer ? 1.0f : 0.0f, 0, 0, nullptr); - SetFullVRAMDirtyRectangle(); -} - -void GPU_HW_D3D12::DestroyFramebuffer() -{ - m_vram_read_texture.Destroy(false); - m_vram_depth_texture.Destroy(false); - m_vram_texture.Destroy(false); - m_vram_readback_texture.Destroy(false); - m_display_texture.Destroy(false); - m_vram_readback_staging_texture.Destroy(false); -} - -bool GPU_HW_D3D12::CreateVertexBuffer() -{ - if (!m_vertex_stream_buffer.Create(VERTEX_BUFFER_SIZE)) - return false; - - D3D12::SetObjectName(m_vertex_stream_buffer.GetBuffer(), "Vertex Stream Buffer"); - return true; -} - -bool GPU_HW_D3D12::CreateUniformBuffer() -{ - if (!m_uniform_stream_buffer.Create(UNIFORM_BUFFER_SIZE)) - return false; - - D3D12::SetObjectName(m_vertex_stream_buffer.GetBuffer(), "Uniform Stream Buffer"); - return true; -} - -bool GPU_HW_D3D12::CreateTextureBuffer() -{ - if (!m_texture_stream_buffer.Create(VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) - return false; - - if (!g_d3d12_context->GetDescriptorHeapManager().Allocate(&m_texture_stream_buffer_srv)) - return false; - - D3D12_SHADER_RESOURCE_VIEW_DESC desc = {}; - desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - desc.Format = DXGI_FORMAT_R16_UINT; - desc.Buffer.NumElements = VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16); - desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - g_d3d12_context->GetDevice()->CreateShaderResourceView(m_texture_stream_buffer.GetBuffer(), &desc, - m_texture_stream_buffer_srv); - - D3D12::SetObjectName(m_texture_stream_buffer.GetBuffer(), "Texture Stream Buffer"); - return true; -} - -bool GPU_HW_D3D12::CompilePipelines() -{ - D3D12::ShaderCache shader_cache; - shader_cache.Open(EmuFolders::Cache, g_d3d12_context->GetFeatureLevel(), g_settings.gpu_use_debug_device); - - GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); - - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + - (2 * 2) + 2 + 2 + 1 + 1 + (2 * 3) + 1); - - // vertex shaders - [textured] - // fragment shaders - [render_mode][texture_mode][dithering][interlacing] - DimensionalArray, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; - - for (u8 textured = 0; textured < 2; textured++) - { - const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); - batch_vertex_shaders[textured] = shader_cache.GetVertexShader(vs); - if (!batch_vertex_shaders[textured]) - return false; - - progress.Increment(); - } - - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - - batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = shader_cache.GetPixelShader(fs); - if (!batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]) - return false; - - progress.Increment(); - } - } - } - } - - D3D12::GraphicsPipelineBuilder gpbuilder; - - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - for (u8 depth_test = 0; depth_test < 2; depth_test++) - { - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); - - gpbuilder.SetRootSignature(m_batch_root_signature.Get()); - gpbuilder.SetRenderTarget(0, m_vram_texture.GetDXGIFormat()); - gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetDXGIFormat()); - - gpbuilder.AddVertexAttribute("ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x)); - gpbuilder.AddVertexAttribute("ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color)); - if (textured) - { - gpbuilder.AddVertexAttribute("ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u)); - gpbuilder.AddVertexAttribute("ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage)); - if (m_using_uv_limits) - gpbuilder.AddVertexAttribute("ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, - offsetof(BatchVertex, uv_limits)); - } - - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)].Get()); - gpbuilder.SetPixelShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].Get()); - - gpbuilder.SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); - gpbuilder.SetDepthState(true, true, - (depth_test != 0) ? (m_pgxp_depth_buffer ? D3D12_COMPARISON_FUNC_LESS_EQUAL : - D3D12_COMPARISON_FUNC_GREATER_EQUAL) : - D3D12_COMPARISON_FUNC_ALWAYS); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetMultisamples(m_multisamples); - - if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - m_texture_filtering != GPUTextureFilter::Nearest) - { - gpbuilder.SetBlendState( - 0, true, D3D12_BLEND_ONE, - m_supports_dual_source_blend ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC_ALPHA, - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - D3D12_BLEND_OP_REV_SUBTRACT : - D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD); - } - - m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] = - gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache); - if (!m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]) - return false; - - D3D12::SetObjectNameFormatted( - m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - .Get(), - "Batch Pipeline %u,%u,%u,%u,%u,%u", depth_test, render_mode, texture_mode, transparency_mode, dithering, - interlacing); - - progress.Increment(); - } - } - } - } - } - } - - ComPtr fullscreen_quad_vertex_shader = - shader_cache.GetVertexShader(shadergen.GenerateScreenQuadVertexShader()); - if (!fullscreen_quad_vertex_shader) - return false; - - progress.Increment(); - - // common state - gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); - gpbuilder.SetRenderTarget(0, m_vram_texture.GetDXGIFormat()); - gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetDXGIFormat()); - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); - gpbuilder.SetMultisamples(m_multisamples); - gpbuilder.SetRenderTarget(0, m_vram_texture.GetDXGIFormat()); - gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetDXGIFormat()); - - // VRAM fill - for (u8 wrapped = 0; wrapped < 2; wrapped++) - { - for (u8 interlaced = 0; interlaced < 2; interlaced++) - { - ComPtr fs = shader_cache.GetPixelShader( - shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); - if (!fs) - return false; - - gpbuilder.SetPixelShader(fs.Get()); - gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); - - m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_fill_pipelines[wrapped][interlaced]) - return false; - - D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[wrapped][interlaced].Get(), - "VRAM Fill Pipeline Wrapped=%u,Interlacing=%u", wrapped, interlaced); - - progress.Increment(); - } - } - - // VRAM copy - { - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMCopyFragmentShader()); - if (!fs) - return false; - - gpbuilder.SetPixelShader(fs.Get()); - for (u8 depth_test = 0; depth_test < 2; depth_test++) - { - gpbuilder.SetDepthState((depth_test != 0), true, - (depth_test != 0) ? D3D12_COMPARISON_FUNC_GREATER_EQUAL : D3D12_COMPARISON_FUNC_ALWAYS); - - m_vram_copy_pipelines[depth_test] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_copy_pipelines[depth_test]) - return false; - - D3D12::SetObjectNameFormatted(m_vram_copy_pipelines[depth_test].Get(), "VRAM Copy Pipeline Depth=%u", depth_test); - - progress.Increment(); - } - } - - // VRAM write - { - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMWriteFragmentShader(false)); - if (!fs) - return false; - - gpbuilder.SetPixelShader(fs.Get()); - for (u8 depth_test = 0; depth_test < 2; depth_test++) - { - gpbuilder.SetDepthState(true, true, - (depth_test != 0) ? D3D12_COMPARISON_FUNC_GREATER_EQUAL : D3D12_COMPARISON_FUNC_ALWAYS); - m_vram_write_pipelines[depth_test] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_write_pipelines[depth_test]) - return false; - - D3D12::SetObjectNameFormatted(m_vram_write_pipelines[depth_test].Get(), "VRAM Write Pipeline Depth=%u", - depth_test); - - progress.Increment(); - } - } - - // VRAM update depth - { - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMUpdateDepthFragmentShader()); - if (!fs) - return false; - - gpbuilder.SetRootSignature(m_batch_root_signature.Get()); - gpbuilder.SetPixelShader(fs.Get()); - gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); - gpbuilder.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, - D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, 0); - gpbuilder.ClearRenderTargets(); - - m_vram_update_depth_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_update_depth_pipeline) - return false; - - D3D12::SetObjectName(m_vram_update_depth_pipeline.Get(), "VRAM Update Depth Pipeline"); - - progress.Increment(); - } - - gpbuilder.Clear(); - - // VRAM read - { - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateVRAMReadFragmentShader()); - if (!fs) - return false; - - gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); - gpbuilder.SetPixelShader(fs.Get()); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetRenderTarget(0, m_vram_readback_texture.GetDXGIFormat()); - gpbuilder.ClearDepthStencilFormat(); - - m_vram_readback_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_readback_pipeline) - return false; - - D3D12::SetObjectName(m_vram_update_depth_pipeline.Get(), "VRAM Readback Pipeline"); - - progress.Increment(); - } - - gpbuilder.Clear(); - - // Display - { - gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetRenderTarget(0, m_display_texture.GetDXGIFormat()); - - for (u8 depth_24 = 0; depth_24 < 2; depth_24++) - { - for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) - { - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); - if (!fs) - return false; - - gpbuilder.SetPixelShader(fs.Get()); - - m_display_pipelines[depth_24][interlace_mode] = - gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_display_pipelines[depth_24][interlace_mode]) - return false; - - D3D12::SetObjectNameFormatted(m_display_pipelines[depth_24][interlace_mode].Get(), - "Display Pipeline Depth=%u Interlace=%u", depth_24, interlace_mode); - - progress.Increment(); - } - } - } - - // copy/blit - { - gpbuilder.Clear(); - gpbuilder.SetRootSignature(m_single_sampler_root_signature.Get()); - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader.Get()); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); - - ComPtr fs = shader_cache.GetPixelShader(shadergen.GenerateCopyFragmentShader()); - if (!fs) - return false; - - gpbuilder.SetPixelShader(fs.Get()); - - m_copy_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache); - if (!m_copy_pipeline) - return false; - - progress.Increment(); - } - -#undef UPDATE_PROGRESS - - return true; -} - -void GPU_HW_D3D12::DestroyPipelines() -{ - m_batch_pipelines = {}; - m_vram_fill_pipelines = {}; - m_vram_write_pipelines = {}; - m_vram_copy_pipelines = {}; - m_vram_readback_pipeline.Reset(); - m_vram_update_depth_pipeline.Reset(); - - m_display_pipelines = {}; -} - -bool GPU_HW_D3D12::CreateTextureReplacementStreamBuffer() -{ - if (m_texture_replacment_stream_buffer.IsValid()) - return true; - - if (!m_texture_replacment_stream_buffer.Create(TEXTURE_REPLACEMENT_BUFFER_SIZE)) - { - Log_ErrorPrint("Failed to allocate texture replacement streaming buffer"); - return false; - } - - return true; -} - -bool GPU_HW_D3D12::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - if (!CreateTextureReplacementStreamBuffer()) - return false; - - if (m_vram_write_replacement_texture.GetWidth() < tex->GetWidth() || - m_vram_write_replacement_texture.GetHeight() < tex->GetHeight()) - { - if (!m_vram_write_replacement_texture.Create(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_NONE)) - { - Log_ErrorPrint("Failed to create VRAM write replacement texture"); - return false; - } - } - - const u32 copy_pitch = Common::AlignUpPow2(tex->GetWidth() * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 required_size = copy_pitch * tex->GetHeight(); - if (!m_texture_replacment_stream_buffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) - { - Log_PerfPrint("Executing command buffer while waiting for texture replacement buffer space"); - g_d3d12_context->ExecuteCommandList(false); - RestoreGraphicsAPIState(); - if (!m_texture_replacment_stream_buffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) - { - Log_ErrorPrintf("Failed to allocate %u bytes from texture replacement streaming buffer", required_size); - return false; - } - } - - // buffer -> texture - const u32 sb_offset = m_texture_replacment_stream_buffer.GetCurrentOffset(); - D3D12::Texture::CopyToUploadBuffer(tex->GetPixels(), tex->GetPitch(), tex->GetHeight(), - m_texture_replacment_stream_buffer.GetCurrentHostPointer(), copy_pitch); - m_texture_replacment_stream_buffer.CommitMemory(required_size); - m_vram_write_replacement_texture.CopyFromBuffer(0, 0, tex->GetWidth(), tex->GetHeight(), copy_pitch, - m_texture_replacment_stream_buffer.GetBuffer(), sb_offset); - m_vram_write_replacement_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - // texture -> vram - const float uniforms[] = { - 0.0f, 0.0f, static_cast(tex->GetWidth()) / static_cast(m_vram_write_replacement_texture.GetWidth()), - static_cast(tex->GetHeight()) / static_cast(m_vram_write_replacement_texture.GetHeight())}; - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_write_replacement_texture.GetSRVDescriptor()); - cmdlist->SetGraphicsRootDescriptorTable(2, m_linear_sampler.gpu_handle); - cmdlist->SetPipelineState(m_copy_pipeline.Get()); - D3D12::SetViewportAndScissor(cmdlist, dst_x, dst_y, width, height); - cmdlist->DrawInstanced(3, 1, 0, 0); - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_D3D12::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - // [primitive][depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - ID3D12PipelineState* pipeline = - m_batch_pipelines[BoolToUInt8(m_batch.check_mask_before_draw || m_batch.use_depth_buffer)][static_cast( - render_mode)][static_cast(m_batch.texture_mode)][static_cast(m_batch.transparency_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] - .Get(); - - cmdlist->SetPipelineState(pipeline); - cmdlist->DrawInstanced(num_vertices, 1, base_vertex, 0); -} - -void GPU_HW_D3D12::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - D3D12::SetScissor(g_d3d12_context->GetCommandList(), left, top, right - left, bottom - top); -} - -void GPU_HW_D3D12::ClearDisplay() -{ - GPU_HW::ClearDisplay(); - - g_host_display->ClearDisplayTexture(); - - static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; - m_display_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - g_d3d12_context->GetCommandList()->ClearRenderTargetView(m_display_texture.GetRTVOrDSVDescriptor(), clear_color, 0, - nullptr); -} - -void GPU_HW_D3D12::UpdateDisplay() -{ - GPU_HW::UpdateDisplay(); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - UpdateVRAMReadTexture(); - g_host_display->SetDisplayTexture(&m_vram_read_texture, 0, 0, m_vram_read_texture.GetWidth(), - m_vram_read_texture.GetHeight()); - } - else - { - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - g_host_display->SetDisplayTexture(&m_vram_texture, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - } - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) - { - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - g_host_display->SetDisplayTexture(&m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - else - { - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - m_display_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - cmdlist->OMSetRenderTargets(1, &m_display_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); - cmdlist->SetPipelineState( - m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get()); - D3D12::SetViewportAndScissor(cmdlist, 0, 0, scaled_display_width, scaled_display_height); - cmdlist->DrawInstanced(3, 1, 0, 0); - - m_display_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - g_host_display->SetDisplayTexture(&m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - - RestoreGraphicsAPIState(); - } - } -} - -void GPU_HW_D3D12::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - { - ReadSoftwareRendererVRAM(x, y, width, height); - return; - } - - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - m_vram_readback_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - cmdlist->OMSetRenderTargets(1, &m_vram_readback_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_readback_pipeline.Get()); - D3D12::SetViewportAndScissor(cmdlist, 0, 0, encoded_width, encoded_height); - cmdlist->DrawInstanced(3, 1, 0, 0); - - m_vram_readback_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - // Stage the readback. - m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, encoded_width, - encoded_height); - - // And copy it into our shadow buffer (will execute command buffer and stall). - m_vram_readback_staging_texture.ReadPixels(0, 0, encoded_width, encoded_height, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], - VRAM_WIDTH * sizeof(u16)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D12::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - if (IsUsingSoftwareRendererForReadbacks()) - FillSoftwareRendererVRAM(x, y, width, height, color); - - // TODO: Use fast clear - GPU_HW::FillVRAM(x, y, width, height, color); - - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - const bool wrapped = IsVRAMFillOversized(x, y, width, height); - const bool interlaced = IsInterlacedRenderingEnabled(); - if (!wrapped && !interlaced) - { - const D3D12_RECT rc = {static_cast(x * m_resolution_scale), static_cast(y * m_resolution_scale), - static_cast((x + width) * m_resolution_scale), - static_cast((y + height) * m_resolution_scale)}; - cmdlist->ClearRenderTargetView(m_vram_texture.GetRTVOrDSVDescriptor(), uniforms.u_fill_color, 1, &rc); - cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, - uniforms.u_fill_color[3], 0, 1, &rc); - return; - } - - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] - [BoolToUInt8(IsInterlacedRenderingEnabled())] - .Get()); - - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - D3D12::SetViewportAndScissor(cmdlist, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, - bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); - - cmdlist->DrawInstanced(3, 1, 0, 0); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D12::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - if (IsUsingSoftwareRendererForReadbacks()) - UpdateSoftwareRendererVRAM(x, y, width, height, data, set_mask, check_mask); - - const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, set_mask, check_mask); - - if (!check_mask) - { - const TextureReplacementTexture* rtex = g_texture_replacements.GetVRAMWriteReplacement(width, height, data); - if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, - width * m_resolution_scale, height * m_resolution_scale)) - { - return; - } - } - - const u32 data_size = width * height * sizeof(u16); - const u32 alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; // ??? - if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in stream buffer", data_size); - g_d3d12_context->ExecuteCommandList(false); - RestoreGraphicsAPIState(); - if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) - { - Panic("Failed to allocate space in stream buffer for VRAM write"); - return; - } - } - - const u32 start_index = m_texture_stream_buffer.GetCurrentOffset() / sizeof(u16); - std::memcpy(m_texture_stream_buffer.GetCurrentHostPointer(), data, data_size); - m_texture_stream_buffer.CommitMemory(data_size); - - const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, set_mask, check_mask); - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, m_texture_stream_buffer_srv); - cmdlist->SetPipelineState(m_vram_write_pipelines[BoolToUInt8(check_mask)].Get()); - - // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - D3D12::SetScissor(cmdlist, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), - scaled_bounds.GetHeight()); - - cmdlist->DrawInstanced(3, 1, 0, 0); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D12::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); - - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) - { - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - if (m_vram_dirty_rect.Intersects(src_bounds)) - UpdateVRAMReadTexture(); - IncludeVRAMDirtyRectangle(dst_bounds); - - const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_read_texture.GetSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)].Get()); - D3D12::SetViewportAndScissor(cmdlist, dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), - dst_bounds_scaled.GetHeight()); - cmdlist->DrawInstanced(3, 1, 0, 0); - - RestoreGraphicsAPIState(); - - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; - - return; - } - - if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) - UpdateVRAMReadTexture(); - - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); - - src_x *= m_resolution_scale; - src_y *= m_resolution_scale; - dst_x *= m_resolution_scale; - dst_y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - const D3D12_TEXTURE_COPY_LOCATION src = {m_vram_read_texture.GetResource(), - D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; - const D3D12_TEXTURE_COPY_LOCATION dst = {m_vram_texture.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; - const D3D12_BOX src_box = {src_x, src_y, 0u, src_x + width, src_y + height, 1u}; - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - - g_d3d12_context->GetCommandList()->CopyTextureRegion(&dst, dst_x, dst_y, 0, &src, &src_box); - - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); -} - -void GPU_HW_D3D12::UpdateVRAMReadTexture() -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; - - if (m_vram_texture.IsMultisampled()) - { - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RESOLVE_SOURCE); - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_RESOLVE_DEST); - cmdlist->ResolveSubresource(m_vram_read_texture, 0, m_vram_texture, 0, m_vram_texture.GetDXGIFormat()); - } - else - { - const D3D12_TEXTURE_COPY_LOCATION src = {m_vram_texture.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; - const D3D12_TEXTURE_COPY_LOCATION dst = {m_vram_read_texture.GetResource(), - D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX}; - const D3D12_BOX src_box = {scaled_rect.left, scaled_rect.top, 0u, scaled_rect.right, scaled_rect.bottom, 1u}; - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); - cmdlist->CopyTextureRegion(&dst, scaled_rect.left, scaled_rect.top, 0, &src, &src_box); - } - - m_vram_read_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - GPU_HW::UpdateVRAMReadTexture(); -} - -void GPU_HW_D3D12::UpdateDepthBufferFromMaskBit() -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - cmdlist->OMSetRenderTargets(0, nullptr, FALSE, &m_vram_depth_texture.GetRTVOrDSVDescriptor().cpu_handle); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_update_depth_pipeline.Get()); - D3D12::SetViewportAndScissor(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - cmdlist->DrawInstanced(3, 1, 0, 0); - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D12::ClearDepthBuffer() -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, - m_pgxp_depth_buffer ? 1.0f : 0.0f, 0, 0, nullptr); -} - -std::unique_ptr GPU::CreateHardwareD3D12Renderer() -{ - if (!Host::AcquireHostDisplay(RenderAPI::D3D12)) - { - Log_ErrorPrintf("Host render API is incompatible"); - return nullptr; - } - - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize()) - return nullptr; - - return gpu; -} diff --git a/src/core/gpu_hw_d3d12.h b/src/core/gpu_hw_d3d12.h deleted file mode 100644 index 88cf60e8e..000000000 --- a/src/core/gpu_hw_d3d12.h +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/d3d12/staging_texture.h" -#include "common/d3d12/stream_buffer.h" -#include "common/d3d12/texture.h" -#include "common/dimensional_array.h" -#include "gpu_hw.h" -#include "texture_replacements.h" -#include -#include -#include - -class GPU_HW_D3D12 final : public GPU_HW -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - GPU_HW_D3D12(); - ~GPU_HW_D3D12() override; - - GPURenderer GetRendererType() const override; - - bool Initialize() override; - void Reset(bool clear_vram) override; - - void ResetGraphicsAPIState() override; - void RestoreGraphicsAPIState() override; - void UpdateSettings() override; - -protected: - void ClearDisplay() override; - void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateVRAMReadTexture() override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; - -private: - enum : u32 - { - MAX_PUSH_CONSTANTS_SIZE = 64, - TEXTURE_REPLACEMENT_BUFFER_SIZE = 64 * 1024 * 1024, - }; - void SetCapabilities(); - void DestroyResources(); - - bool CreateRootSignatures(); - bool CreateSamplers(); - - bool CreateFramebuffer(); - void ClearFramebuffer(); - void DestroyFramebuffer(); - - bool CreateVertexBuffer(); - bool CreateUniformBuffer(); - bool CreateTextureBuffer(); - - bool CompilePipelines(); - void DestroyPipelines(); - - bool CreateTextureReplacementStreamBuffer(); - bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); - - ComPtr m_batch_root_signature; - ComPtr m_single_sampler_root_signature; - - D3D12::Texture m_vram_texture; - D3D12::Texture m_vram_depth_texture; - D3D12::Texture m_vram_read_texture; - D3D12::Texture m_vram_readback_texture; - D3D12::StagingTexture m_vram_readback_staging_texture; - D3D12::Texture m_display_texture; - - D3D12::DescriptorHandle m_point_sampler; - D3D12::DescriptorHandle m_linear_sampler; - - D3D12::StreamBuffer m_vertex_stream_buffer; - D3D12::StreamBuffer m_uniform_stream_buffer; - D3D12::StreamBuffer m_texture_stream_buffer; - D3D12::DescriptorHandle m_texture_stream_buffer_srv; - - u32 m_current_uniform_buffer_offset = 0; - - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - DimensionalArray, 2, 2, 5, 9, 4, 2> m_batch_pipelines; - - // [wrapped][interlaced] - DimensionalArray, 2, 2> m_vram_fill_pipelines; - - // [depth_test] - std::array, 2> m_vram_write_pipelines; - std::array, 2> m_vram_copy_pipelines; - - ComPtr m_vram_readback_pipeline; - ComPtr m_vram_update_depth_pipeline; - - // [depth_24][interlace_mode] - DimensionalArray, 3, 2> m_display_pipelines; - - ComPtr m_copy_pipeline; - D3D12::Texture m_vram_write_replacement_texture; - D3D12::StreamBuffer m_texture_replacment_stream_buffer; -}; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp deleted file mode 100644 index bebd36983..000000000 --- a/src/core/gpu_hw_opengl.cpp +++ /dev/null @@ -1,1335 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "gpu_hw_opengl.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/timer.h" -#include "gpu_hw_shadergen.h" -#include "host.h" -#include "util/host_display.h" -#include "shader_cache_version.h" -#include "system.h" -#include "texture_replacements.h" -#include "util/state_wrapper.h" -Log_SetChannel(GPU_HW_OpenGL); - -GPU_HW_OpenGL::GPU_HW_OpenGL() : GPU_HW() {} - -GPU_HW_OpenGL::~GPU_HW_OpenGL() -{ - // Destroy objects which don't have destructors to clean them up - if (m_vram_fbo_id != 0) - glDeleteFramebuffers(1, &m_vram_fbo_id); - if (m_vao_id != 0) - glDeleteVertexArrays(1, &m_vao_id); - if (m_attributeless_vao_id != 0) - glDeleteVertexArrays(1, &m_attributeless_vao_id); - if (m_texture_buffer_r16ui_texture != 0) - glDeleteTextures(1, &m_texture_buffer_r16ui_texture); - - g_host_display->ClearDisplayTexture(); - - // One of our programs might've been bound. - GL::Program::ResetLastProgram(); - glUseProgram(0); -} - -GPURenderer GPU_HW_OpenGL::GetRendererType() const -{ - return GPURenderer::HardwareOpenGL; -} - -bool GPU_HW_OpenGL::Initialize() -{ - SetCapabilities(); - - if (!GPU_HW::Initialize()) - return false; - - if (!CreateFramebuffer()) - { - Log_ErrorPrintf("Failed to create framebuffer"); - return false; - } - - if (!CreateVertexBuffer()) - { - Log_ErrorPrintf("Failed to create vertex buffer"); - return false; - } - - if (!CreateUniformBuffer()) - { - Log_ErrorPrintf("Failed to create uniform buffer"); - return false; - } - - if (!CreateTextureBuffer()) - { - Log_ErrorPrintf("Failed to create texture buffer"); - return false; - } - - if (!CompilePrograms()) - { - Log_ErrorPrintf("Failed to compile programs"); - return false; - } - - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_OpenGL::Reset(bool clear_vram) -{ - GPU_HW::Reset(clear_vram); - - if (clear_vram) - ClearFramebuffer(); -} - -bool GPU_HW_OpenGL::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) -{ - if (host_texture) - { - GPUTexture* tex = *host_texture; - if (sw.IsReading()) - { - if (tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - return false; - } - - CopyFramebufferForState(m_vram_texture.GetGLTarget(), static_cast(tex)->GetGLId(), 0, 0, 0, - m_vram_texture.GetGLId(), m_vram_fbo_id, 0, 0, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight()); - } - else - { - if (!tex || tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - delete tex; - - tex = g_host_display - ->CreateTexture(m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 1, 1, - m_vram_texture.GetSamples(), GPUTexture::Format::RGBA8, nullptr, 0, false) - .release(); - *host_texture = tex; - if (!tex) - return false; - } - - CopyFramebufferForState(m_vram_texture.GetGLTarget(), m_vram_texture.GetGLId(), m_vram_fbo_id, 0, 0, - static_cast(tex)->GetGLId(), 0, 0, 0, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight()); - } - } - - return GPU_HW::DoState(sw, host_texture, update_display); -} - -void GPU_HW_OpenGL::CopyFramebufferForState(GLenum target, GLuint src_texture, u32 src_fbo, u32 src_x, u32 src_y, - GLuint dst_texture, u32 dst_fbo, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - if (target != GL_TEXTURE_2D && GLAD_GL_VERSION_4_3) - { - glCopyImageSubData(src_texture, target, 0, src_x, src_y, 0, dst_texture, target, 0, dst_x, dst_y, 0, width, height, - 1); - } - else if (target != GL_TEXTURE_2D && GLAD_GL_EXT_copy_image) - { - glCopyImageSubDataEXT(src_texture, target, 0, src_x, src_y, 0, dst_texture, target, 0, dst_x, dst_y, 0, width, - height, 1); - } - else if (target != GL_TEXTURE_2D && GLAD_GL_OES_copy_image) - { - glCopyImageSubDataOES(src_texture, target, 0, src_x, src_y, 0, dst_texture, target, 0, dst_x, dst_y, 0, width, - height, 1); - } - else - { - if (src_fbo == 0) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_state_copy_fbo_id); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, src_texture, 0); - } - else - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo); - } - - if (dst_fbo == 0) - { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_state_copy_fbo_id); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, dst_texture, 0); - } - else - { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo); - } - - glDisable(GL_SCISSOR_TEST); - glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, - GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); - - if (src_fbo == 0) - { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - } - else if (dst_fbo == 0) - { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - } - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - } -} - -void GPU_HW_OpenGL::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - glEnable(GL_CULL_FACE); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glBindVertexArray(0); - m_uniform_stream_buffer->Unbind(); -} - -void GPU_HW_OpenGL::RestoreGraphicsAPIState() -{ - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - - glDisable(GL_CULL_FACE); - glEnable(GL_DEPTH_TEST); - glEnable(GL_SCISSOR_TEST); - glDepthMask(GL_TRUE); - glBindVertexArray(m_vao_id); - m_uniform_stream_buffer->Bind(); - m_vram_read_texture.Bind(); - SetBlendMode(); - m_current_depth_test = 0; - SetDepthFunc(); - SetScissorFromDrawingArea(); - m_batch_ubo_dirty = true; -} - -void GPU_HW_OpenGL::UpdateSettings() -{ - GPU_HW::UpdateSettings(); - - bool framebuffer_changed, shaders_changed; - UpdateHWSettings(&framebuffer_changed, &shaders_changed); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); - g_host_display->ClearDisplayTexture(); - CreateFramebuffer(); - } - if (shaders_changed) - CompilePrograms(); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); - ResetGraphicsAPIState(); - } -} - -void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices) -{ - DebugAssert(!m_batch_start_vertex_ptr); - - const GL::StreamBuffer::MappingResult res = - m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex)); - - m_batch_start_vertex_ptr = static_cast(res.pointer); - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned; - m_batch_base_vertex = res.index_aligned; -} - -void GPU_HW_OpenGL::UnmapBatchVertexPointer(u32 used_vertices) -{ - DebugAssert(m_batch_start_vertex_ptr); - - m_vertex_stream_buffer->Unmap(used_vertices * sizeof(BatchVertex)); - m_batch_start_vertex_ptr = nullptr; - m_batch_end_vertex_ptr = nullptr; - m_batch_current_vertex_ptr = nullptr; -} - -void GPU_HW_OpenGL::SetCapabilities() -{ - GLint max_texture_size = VRAM_WIDTH; - glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); - Log_InfoPrintf("Max texture size: %dx%d", max_texture_size, max_texture_size); - m_max_resolution_scale = static_cast(max_texture_size / VRAM_WIDTH); - - m_max_multisamples = 1; - if (GLAD_GL_ARB_texture_storage || GLAD_GL_ES_VERSION_3_2) - { - glGetIntegerv(GL_MAX_SAMPLES, reinterpret_cast(&m_max_multisamples)); - if (m_max_multisamples == 0) - m_max_multisamples = 1; - } - - m_supports_per_sample_shading = GLAD_GL_VERSION_4_0 || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_ARB_sample_shading; - Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); - Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); - - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); - Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment); - - if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_EXT_copy_image && !GLAD_GL_ES_VERSION_3_2 && !GLAD_GL_OES_copy_image) - Log_WarningPrintf("GL_EXT/OES_copy_image missing, this may affect performance."); - -#ifdef __APPLE__ - // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. - m_use_texture_buffer_for_vram_writes = false; -#else - m_use_texture_buffer_for_vram_writes = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2); - - // And Samsung's ANGLE/GLES driver? - if (std::strstr(reinterpret_cast(glGetString(GL_RENDERER)), "ANGLE")) - m_use_texture_buffer_for_vram_writes = false; -#endif - m_texture_stream_buffer_size = VRAM_UPDATE_TEXTURE_BUFFER_SIZE; - if (m_use_texture_buffer_for_vram_writes) - { - GLint max_texel_buffer_size; - glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, reinterpret_cast(&max_texel_buffer_size)); - Log_InfoPrintf("Max texel buffer size: %u", max_texel_buffer_size); - if (max_texel_buffer_size < static_cast(VRAM_WIDTH * VRAM_HEIGHT)) - { - Log_WarningPrintf("Maximum texture buffer size is less than VRAM size, not using texel buffers."); - m_use_texture_buffer_for_vram_writes = false; - } - else - { - m_texture_stream_buffer_size = - std::min(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast(max_texel_buffer_size) * sizeof(u16)); - } - } - - if (!m_use_texture_buffer_for_vram_writes) - { - // Try SSBOs. - GLint max_fragment_storage_blocks = 0; - GLint64 max_ssbo_size = 0; - if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object) - { - glGetIntegerv(GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, &max_fragment_storage_blocks); - glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); - } - - Log_InfoPrintf("Max fragment shader storage blocks: %d", max_fragment_storage_blocks); - Log_InfoPrintf("Max shader storage buffer size: %" PRId64, max_ssbo_size); - m_use_ssbo_for_vram_writes = (max_fragment_storage_blocks > 0 && - max_ssbo_size >= static_cast(VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16))); - if (m_use_ssbo_for_vram_writes) - { - Log_InfoPrintf("Using shader storage buffers for VRAM writes."); - m_texture_stream_buffer_size = - static_cast(std::min(VRAM_UPDATE_TEXTURE_BUFFER_SIZE, static_cast(max_ssbo_size))); - } - else - { - Log_WarningPrintf("Texture buffers and SSBOs are not supported, VRAM writes will be slower and multisampling " - "will be unavailable."); - m_max_multisamples = 1; - m_supports_per_sample_shading = false; - } - } - - int max_dual_source_draw_buffers = 0; - glGetIntegerv(GL_MAX_DUAL_SOURCE_DRAW_BUFFERS, &max_dual_source_draw_buffers); - m_supports_dual_source_blend = - (max_dual_source_draw_buffers > 0) && - (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); - - // adaptive smoothing would require texture views, which aren't in GLES. - m_supports_adaptive_downsampling = false; - - // noperspective is not supported in GLSL ES. - m_supports_disable_color_perspective = (g_host_display->GetRenderAPI() == RenderAPI::OpenGL); -} - -bool GPU_HW_OpenGL::CreateFramebuffer() -{ - // scale vram size to internal resolution - const u32 texture_width = VRAM_WIDTH * m_resolution_scale; - const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; - const u32 multisamples = m_multisamples; - - if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, multisamples, GPUTexture::Format::RGBA8, nullptr, 0, - true, true) || - !m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, multisamples, GPUTexture::Format::D16, nullptr, - 0, false, true) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false, - true) || - !m_vram_read_texture.CreateFramebuffer() || - !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false, - true) || - !m_vram_encoding_texture.CreateFramebuffer() || - !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, - 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, true, true) || - !m_display_texture.CreateFramebuffer()) - { - return false; - } - - if (m_vram_fbo_id == 0) - glGenFramebuffers(1, &m_vram_fbo_id); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_vram_texture.GetGLTarget(), - m_vram_texture.GetGLId(), 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_vram_depth_texture.GetGLTarget(), - m_vram_depth_texture.GetGLId(), 0); - Assert(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - - if (m_downsample_mode == GPUDownsampleMode::Box) - { - if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8) || - !m_downsample_texture.CreateFramebuffer()) - { - return false; - } - } - - if (m_state_copy_fbo_id == 0) - glGenFramebuffers(1, &m_state_copy_fbo_id); - - SetFullVRAMDirtyRectangle(); - return true; -} - -void GPU_HW_OpenGL::ClearFramebuffer() -{ - const float depth_clear_value = m_pgxp_depth_buffer ? 1.0f : 0.0f; - - glDisable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 0.0f); - IsGLES() ? glClearDepthf(depth_clear_value) : glClearDepth(depth_clear_value); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - glEnable(GL_SCISSOR_TEST); - m_last_depth_z = 1.0f; - SetFullVRAMDirtyRectangle(); -} - -bool GPU_HW_OpenGL::CreateVertexBuffer() -{ - m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE); - if (!m_vertex_stream_buffer) - return false; - - m_vertex_stream_buffer->Bind(); - - glGenVertexArrays(1, &m_vao_id); - glBindVertexArray(m_vao_id); - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); - glEnableVertexAttribArray(2); - glEnableVertexAttribArray(3); - glEnableVertexAttribArray(4); - glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, x))); - glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), - reinterpret_cast(offsetof(BatchVertex, color))); - glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, u))); - glVertexAttribIPointer(3, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), - reinterpret_cast(offsetof(BatchVertex, texpage))); - glVertexAttribPointer(4, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), - reinterpret_cast(offsetof(BatchVertex, uv_limits))); - glBindVertexArray(0); - - glGenVertexArrays(1, &m_attributeless_vao_id); - return true; -} - -bool GPU_HW_OpenGL::CreateUniformBuffer() -{ - m_uniform_stream_buffer = GL::StreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE); - if (!m_uniform_stream_buffer) - return false; - - return true; -} - -bool GPU_HW_OpenGL::CreateTextureBuffer() -{ - const GLenum target = - (m_use_ssbo_for_vram_writes ? GL_SHADER_STORAGE_BUFFER : - (m_use_texture_buffer_for_vram_writes ? GL_TEXTURE_BUFFER : GL_PIXEL_UNPACK_BUFFER)); - m_texture_stream_buffer = GL::StreamBuffer::Create(target, m_texture_stream_buffer_size); - if (!m_texture_stream_buffer) - return false; - - if (m_use_texture_buffer_for_vram_writes) - { - glGenTextures(1, &m_texture_buffer_r16ui_texture); - glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_texture_stream_buffer->GetGLBufferId()); - } - - m_texture_stream_buffer->Unbind(); - return true; -} - -bool GPU_HW_OpenGL::CompilePrograms() -{ - GL::ShaderCache shader_cache; - shader_cache.Open(IsGLES(), EmuFolders::Cache, SHADER_CACHE_VERSION); - - const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); - GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); - - ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + (2 * 2) + 1 + 1 + 1 + 1 + 1); - - for (u32 render_mode = 0; render_mode < 4; render_mode++) - { - for (u32 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); - const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured); - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - - const auto link_callback = [this, textured, use_binding_layout](GL::Program& prog) { - if (!use_binding_layout) - { - prog.BindAttribute(0, "a_pos"); - prog.BindAttribute(1, "a_col0"); - if (textured) - { - prog.BindAttribute(2, "a_texcoord"); - prog.BindAttribute(3, "a_texpage"); - prog.BindAttribute(4, "a_uv_limits"); - } - - if (!IsGLES() || m_supports_dual_source_blend) - { - if (m_supports_dual_source_blend) - { - prog.BindFragDataIndexed(0, "o_col0"); - prog.BindFragDataIndexed(1, "o_col1"); - } - else - { - prog.BindFragData(0, "o_col0"); - } - } - } - }; - - std::optional prog = shader_cache.GetProgram(batch_vs, fs, link_callback); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->BindUniformBlock("UBOBlock", 1); - if (textured) - { - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - } - - m_render_programs[render_mode][texture_mode][dithering][interlacing] = std::move(*prog); - - progress.Increment(); - } - } - } - } - - for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) - { - for (u8 interlaced = 0; interlaced < 3; interlaced++) - { - const std::string vs = shadergen.GenerateScreenQuadVertexShader(); - const std::string fs = shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24bit), static_cast(interlaced), m_chroma_smoothing); - - std::optional prog = shader_cache.GetProgram(vs, fs, [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->BindUniformBlock("UBOBlock", 1); - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - m_display_programs[depth_24bit][interlaced] = std::move(*prog); - progress.Increment(); - } - } - - for (u8 wrapped = 0; wrapped < 2; wrapped++) - { - for (u8 interlaced = 0; interlaced < 2; interlaced++) - { - std::optional prog = shader_cache.GetProgram( - shadergen.GenerateScreenQuadVertexShader(), - shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - prog->BindUniformBlock("UBOBlock", 1); - - m_vram_fill_programs[wrapped][interlaced] = std::move(*prog); - progress.Increment(); - } - } - - std::optional prog = - shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), shadergen.GenerateVRAMReadFragmentShader(), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->BindUniformBlock("UBOBlock", 1); - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - m_vram_read_program = std::move(*prog); - progress.Increment(); - - prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), shadergen.GenerateVRAMCopyFragmentShader(), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->BindUniformBlock("UBOBlock", 1); - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - m_vram_copy_program = std::move(*prog); - progress.Increment(); - - prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), - shadergen.GenerateVRAMUpdateDepthFragmentShader()); - if (!prog) - return false; - - prog->Bind(); - prog->Uniform1i("samp0", 0); - m_vram_update_depth_program = std::move(*prog); - progress.Increment(); - - if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes) - { - prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), - shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->BindUniformBlock("UBOBlock", 1); - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - m_vram_write_program = std::move(*prog); - } - - progress.Increment(); - - if (m_downsample_mode == GPUDownsampleMode::Box) - { - prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), - shadergen.GenerateBoxSampleDownsampleFragmentShader(), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; - - if (!use_binding_layout) - { - prog->Bind(); - prog->Uniform1i("samp0", 0); - } - - m_downsample_program = std::move(*prog); - } - - progress.Increment(); -#undef UPDATE_PROGRESS - - return true; -} - -void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) -{ - const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; - prog.Bind(); - - if (m_current_transparency_mode != m_batch.transparency_mode || m_current_render_mode != render_mode) - { - m_current_transparency_mode = m_batch.transparency_mode; - m_current_render_mode = render_mode; - SetBlendMode(); - } - - SetDepthFunc(); - - glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices); -} - -void GPU_HW_OpenGL::SetBlendMode() -{ - if (UseAlphaBlending(m_current_transparency_mode, m_current_render_mode)) - { - glEnable(GL_BLEND); - glBlendEquationSeparate(m_current_transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ? - GL_FUNC_REVERSE_SUBTRACT : - GL_FUNC_ADD, - GL_FUNC_ADD); - if (m_supports_dual_source_blend) - { - glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO); - } - else - { - const float factor = - (m_current_transparency_mode == GPUTransparencyMode::HalfBackgroundPlusHalfForeground) ? 0.5f : 1.0f; - glBlendFuncSeparate(GL_ONE, GL_CONSTANT_ALPHA, GL_ONE, GL_ZERO); - glBlendColor(0.0f, 0.0f, 0.0f, factor); - } - } - else - { - glDisable(GL_BLEND); - } -} - -bool GPU_HW_OpenGL::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - if (!m_vram_write_replacement_texture.IsValid()) - { - if (!m_vram_write_replacement_texture.Create(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, - GPUTexture::Format::RGBA8) || - !m_vram_write_replacement_texture.CreateFramebuffer()) - { - m_vram_write_replacement_texture.Destroy(); - return false; - } - } - else - { - m_vram_write_replacement_texture.Replace(tex->GetWidth(), tex->GetHeight(), GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, - tex->GetPixels()); - } - - glDisable(GL_SCISSOR_TEST); - m_vram_write_replacement_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - - glBlitFramebuffer(0, tex->GetHeight(), tex->GetWidth(), 0, dst_x, dst_y, dst_x + width, dst_y + height, - GL_COLOR_BUFFER_BIT, GL_LINEAR); - - m_vram_read_texture.Bind(); - glEnable(GL_SCISSOR_TEST); - return true; -} - -void GPU_HW_OpenGL::SetDepthFunc() -{ - SetDepthFunc(m_batch.use_depth_buffer ? GL_LEQUAL : (m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS)); -} - -void GPU_HW_OpenGL::SetDepthFunc(GLenum func) -{ - if (m_current_depth_test == func) - return; - - glDepthFunc(func); - m_current_depth_test = func; -} - -void GPU_HW_OpenGL::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - const int width = right - left; - const int height = bottom - top; - const int x = left; - const int y = top; - - Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height); - glScissor(x, y, width, height); -} - -void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size) -{ - const GL::StreamBuffer::MappingResult res = m_uniform_stream_buffer->Map(m_uniform_buffer_alignment, data_size); - std::memcpy(res.pointer, data, data_size); - m_uniform_stream_buffer->Unmap(data_size); - - glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_stream_buffer->GetGLBufferId(), res.buffer_offset, data_size); - - m_renderer_stats.num_uniform_buffer_updates++; -} - -void GPU_HW_OpenGL::ClearDisplay() -{ - GPU_HW::ClearDisplay(); - - g_host_display->ClearDisplayTexture(); - - m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - glDisable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - glEnable(GL_SCISSOR_TEST); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); -} - -void GPU_HW_OpenGL::UpdateDisplay() -{ - GPU_HW::UpdateDisplay(); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - UpdateVRAMReadTexture(); - - g_host_display->SetDisplayTexture(&m_vram_read_texture, 0, m_vram_read_texture.GetHeight(), - m_vram_read_texture.GetWidth(), m_vram_read_texture.GetHeight()); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, 0, m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), - m_vram_texture.GetHeight()); - } - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) - { - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - } - else - { - glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_DEPTH_TEST); - - m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Bind(); - m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_texture.Bind(); - - if (interlaced == InterlacedRenderMode::None && (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_0)) - { - static constexpr std::array attachments = {GL_COLOR_ATTACHMENT0}; - glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, static_cast(attachments.size()), attachments.data()); - } - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - UploadUniformBuffer(uniforms, sizeof(uniforms)); - m_batch_ubo_dirty = true; - - Assert(scaled_display_width <= m_display_texture.GetWidth() && - scaled_display_height <= m_display_texture.GetHeight()); - - glViewport(0, 0, scaled_display_width, scaled_display_height); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(&m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - } - - // restore state - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - glBindVertexArray(m_vao_id); - glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - glEnable(GL_DEPTH_TEST); - glEnable(GL_SCISSOR_TEST); - m_vram_read_texture.Bind(); - SetBlendMode(); - SetDepthFunc(); - } - } -} - -void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - { - ReadSoftwareRendererVRAM(x, y, width, height); - return; - } - - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); - - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_texture.Bind(); - m_vram_read_program.Bind(); - UploadUniformBuffer(uniforms, sizeof(uniforms)); - glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); - glViewport(0, 0, encoded_width, encoded_height); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - // Readback encoded texture. - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glPixelStorei(GL_PACK_ALIGNMENT, 2); - glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); - glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); - glPixelStorei(GL_PACK_ALIGNMENT, 4); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - RestoreGraphicsAPIState(); -} - -void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - if (IsUsingSoftwareRendererForReadbacks()) - FillSoftwareRendererVRAM(x, y, width, height, color); - - GPU_HW::FillVRAM(x, y, width, height, color); - - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - glScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, width * m_resolution_scale, - height * m_resolution_scale); - - // fast path when not using interlaced rendering - const bool wrapped = IsVRAMFillOversized(x, y, width, height); - const bool interlaced = IsInterlacedRenderingEnabled(); - if (!wrapped && !interlaced) - { - const auto [r, g, b, a] = - RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); - glClearColor(r, g, b, a); - IsGLES() ? glClearDepthf(a) : glClearDepth(a); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - SetScissorFromDrawingArea(); - } - else - { - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - - m_vram_fill_programs[BoolToUInt8(wrapped)][BoolToUInt8(interlaced)].Bind(); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); - glDisable(GL_BLEND); - SetDepthFunc(GL_ALWAYS); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - RestoreGraphicsAPIState(); - } -} - -void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - if (IsUsingSoftwareRendererForReadbacks()) - UpdateSoftwareRendererVRAM(x, y, width, height, data, set_mask, check_mask); - - const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, set_mask, check_mask); - - if (!check_mask) - { - const TextureReplacementTexture* rtex = g_texture_replacements.GetVRAMWriteReplacement(width, height, data); - if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, - width * m_resolution_scale, height * m_resolution_scale)) - { - return; - } - } - - const u32 num_pixels = width * height; - if (m_use_texture_buffer_for_vram_writes || m_use_ssbo_for_vram_writes) - { - const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); - std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); - m_texture_stream_buffer->Unmap(num_pixels * sizeof(u16)); - m_texture_stream_buffer->Unbind(); - - glDisable(GL_BLEND); - SetDepthFunc((check_mask && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS); - - m_vram_write_program.Bind(); - if (m_use_ssbo_for_vram_writes) - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_texture_stream_buffer->GetGLBufferId()); - else - glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); - - const VRAMWriteUBOData uniforms = - GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); - - // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - glScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - RestoreGraphicsAPIState(); - } - else - { - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT || check_mask) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized/masked VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); - return; - } - - GPU_HW::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); - - const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); - - const u32 source_stride = width * sizeof(u16); - const u8* source_ptr = static_cast(data); - const u16 mask_or = set_mask ? 0x8000 : 0x0000; - u32* dest_ptr = static_cast(map_result.pointer); - for (u32 row = 0; row < height; row++) - { - const u8* source_row_ptr = source_ptr; - - for (u32 col = 0; col < width; col++) - { - u16 src_col; - std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); - source_row_ptr += sizeof(src_col); - *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(src_col | mask_or); - } - - source_ptr += source_stride; - } - - m_texture_stream_buffer->Unmap(num_pixels * sizeof(u32)); - m_texture_stream_buffer->Bind(); - - // have to write to the 1x texture first - if (m_resolution_scale > 1) - m_vram_encoding_texture.Bind(); - else - m_vram_texture.Bind(); - - // update texture data - glTexSubImage2D(m_vram_texture.GetGLTarget(), 0, x, y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, - reinterpret_cast(static_cast(map_result.buffer_offset))); - m_texture_stream_buffer->Unbind(); - - if (m_resolution_scale > 1) - { - // scale to internal resolution - const u32 scaled_width = width * m_resolution_scale; - const u32 scaled_height = height * m_resolution_scale; - const u32 scaled_x = x * m_resolution_scale; - const u32 scaled_y = y * m_resolution_scale; - glDisable(GL_SCISSOR_TEST); - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glBlitFramebuffer(x, y, x + width, y + height, scaled_x, scaled_y, scaled_x + scaled_width, - scaled_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); - } - } -} - -void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); - - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const bool src_dirty = m_vram_dirty_rect.Intersects(src_bounds); - - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) - { - if (src_dirty) - UpdateVRAMReadTexture(); - IncludeVRAMDirtyRectangle(dst_bounds); - - const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); - - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - SetDepthFunc((m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) ? GL_GEQUAL : GL_ALWAYS); - - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - glViewport(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), - dst_bounds_scaled.GetHeight()); - m_vram_read_texture.Bind(); - m_vram_copy_program.Bind(); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - RestoreGraphicsAPIState(); - - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; - - return; - } - - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); - - src_x *= m_resolution_scale; - src_y *= m_resolution_scale; - dst_x *= m_resolution_scale; - dst_y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - if (GLAD_GL_VERSION_4_3) - { - glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, src_x, src_y, 0, - m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, dst_x, dst_y, 0, width, height, 1); - } - else if (GLAD_GL_EXT_copy_image) - { - glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, src_x, src_y, 0, - m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, dst_x, dst_y, 0, width, height, 1); - } - else if (GLAD_GL_OES_copy_image) - { - glCopyImageSubDataOES(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, src_x, src_y, 0, - m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, dst_x, dst_y, 0, width, height, 1); - } - else - { - // glBlitFramebufer with same source/destination should be legal, but on Mali (at least Bifrost) it breaks. - // So, blit from the shadow texture, like in the other renderers. - if (src_dirty) - UpdateVRAMReadTexture(); - - glDisable(GL_SCISSOR_TEST); - m_vram_read_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, - GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); - } - - IncludeVRAMDirtyRectangle(dst_bounds); -} - -void GPU_HW_OpenGL::UpdateVRAMReadTexture() -{ - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; - const u32 width = scaled_rect.GetWidth(); - const u32 height = scaled_rect.GetHeight(); - const u32 x = scaled_rect.left; - const u32 y = scaled_rect.top; - const bool multisampled = m_vram_texture.IsMultisampled(); - - if (!multisampled && GLAD_GL_VERSION_4_3) - { - glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else if (!multisampled && GLAD_GL_EXT_copy_image) - { - glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else if (!multisampled && GLAD_GL_OES_copy_image) - { - glCopyImageSubDataOES(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else - { - m_vram_read_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id); - glDisable(GL_SCISSOR_TEST); - glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - } - - GPU_HW::UpdateVRAMReadTexture(); -} - -void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glDepthFunc(GL_ALWAYS); - - m_vram_texture.Bind(); - m_vram_update_depth_program.Bind(); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindVertexArray(m_vao_id); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glEnable(GL_SCISSOR_TEST); - - m_vram_read_texture.Bind(); -} - -void GPU_HW_OpenGL::ClearDepthBuffer() -{ - glDisable(GL_SCISSOR_TEST); - IsGLES() ? glClearDepthf(1.0f) : glClearDepth(1.0f); - glClear(GL_DEPTH_BUFFER_BIT); - glEnable(GL_SCISSOR_TEST); - m_last_depth_z = 1.0f; -} - -void GPU_HW_OpenGL::DownsampleFramebuffer(GL::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - DebugAssert(m_downsample_mode != GPUDownsampleMode::Adaptive); - DownsampleFramebufferBoxFilter(source, left, top, width, height); -} - -void GPU_HW_OpenGL::DownsampleFramebufferBoxFilter(GL::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - const u32 ds_left = left / m_resolution_scale; - const u32 ds_top = top / m_resolution_scale; - const u32 ds_width = width / m_resolution_scale; - const u32 ds_height = height / m_resolution_scale; - - glDisable(GL_BLEND); - glDisable(GL_DEPTH_TEST); - glDisable(GL_SCISSOR_TEST); - glViewport(ds_left, ds_top, ds_width, ds_height); - glBindVertexArray(m_attributeless_vao_id); - source.Bind(); - m_downsample_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_downsample_program.Bind(); - glDrawArrays(GL_TRIANGLES, 0, 3); - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_downsample_texture, ds_left, ds_top, ds_width, ds_height); -} - -std::unique_ptr GPU::CreateHardwareOpenGLRenderer() -{ - // Don't re-request GL when we already have GLES here... - const RenderAPI current_api = g_host_display ? g_host_display->GetRenderAPI() : RenderAPI::None; - if (current_api != RenderAPI::OpenGL && current_api != RenderAPI::OpenGLES && - !Host::AcquireHostDisplay(RenderAPI::OpenGL)) - { - Log_ErrorPrintf("Host render API type is incompatible"); - return nullptr; - } - - const bool opengl_is_available = ((g_host_display->GetRenderAPI() == RenderAPI::OpenGL && - (GLAD_GL_VERSION_3_0 || GLAD_GL_ARB_uniform_buffer_object)) || - (g_host_display->GetRenderAPI() == RenderAPI::OpenGLES && GLAD_GL_ES_VERSION_3_1)); - if (!opengl_is_available) - { - Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", - "OpenGL renderer unavailable, your driver or hardware is not " - "recent enough. OpenGL 3.1 or OpenGL ES 3.1 is required."), - 20.0f); - return nullptr; - } - - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize()) - return nullptr; - - return gpu; -} diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h deleted file mode 100644 index b7a091bfc..000000000 --- a/src/core/gpu_hw_opengl.h +++ /dev/null @@ -1,121 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/gl/loader.h" -#include "common/gl/program.h" -#include "common/gl/shader_cache.h" -#include "common/gl/stream_buffer.h" -#include "common/gl/texture.h" -#include "gpu_hw.h" -#include "texture_replacements.h" -#include -#include -#include - -class GPU_HW_OpenGL final : public GPU_HW -{ -public: - GPU_HW_OpenGL(); - ~GPU_HW_OpenGL() override; - - GPURenderer GetRendererType() const override; - - bool Initialize() override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - - void ResetGraphicsAPIState() override; - void RestoreGraphicsAPIState() override; - void UpdateSettings() override; - -protected: - void ClearDisplay() override; - void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateVRAMReadTexture() override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; - -private: - struct GLStats - { - u32 num_batches; - u32 num_vertices; - u32 num_vram_reads; - u32 num_vram_writes; - u32 num_vram_read_texture_updates; - u32 num_uniform_buffer_updates; - }; - - ALWAYS_INLINE bool IsGLES() const { return (m_render_api == RenderAPI::OpenGLES); } - - void SetCapabilities(); - bool CreateFramebuffer(); - void ClearFramebuffer(); - void CopyFramebufferForState(GLenum target, GLuint src_texture, u32 src_fbo, u32 src_x, u32 src_y, GLuint dst_texture, - u32 dst_fbo, u32 dst_x, u32 dst_y, u32 width, u32 height); - - bool CreateVertexBuffer(); - bool CreateUniformBuffer(); - bool CreateTextureBuffer(); - - bool CompilePrograms(); - - void SetDepthFunc(); - void SetDepthFunc(GLenum func); - void SetBlendMode(); - - bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); - void DownsampleFramebuffer(GL::Texture& source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferBoxFilter(GL::Texture& source, u32 left, u32 top, u32 width, u32 height); - - // downsample texture - used for readbacks at >1xIR. - GL::Texture m_vram_texture; - GL::Texture m_vram_depth_texture; - GL::Texture m_vram_read_texture; - GL::Texture m_vram_encoding_texture; - GL::Texture m_display_texture; - GL::Texture m_vram_write_replacement_texture; - - std::unique_ptr m_vertex_stream_buffer; - GLuint m_vram_fbo_id = 0; - GLuint m_vao_id = 0; - GLuint m_attributeless_vao_id = 0; - GLuint m_state_copy_fbo_id = 0; - - std::unique_ptr m_uniform_stream_buffer; - - std::unique_ptr m_texture_stream_buffer; - GLuint m_texture_buffer_r16ui_texture = 0; - - std::array, 2>, 9>, 4> - m_render_programs; // [render_mode][texture_mode][dithering][interlacing] - std::array, 2> m_display_programs; // [depth_24][interlaced] - std::array, 2> m_vram_fill_programs; - GL::Program m_vram_read_program; - GL::Program m_vram_write_program; - GL::Program m_vram_copy_program; - GL::Program m_vram_update_depth_program; - - u32 m_uniform_buffer_alignment = 1; - u32 m_texture_stream_buffer_size = 0; - - bool m_use_texture_buffer_for_vram_writes = false; - bool m_use_ssbo_for_vram_writes = false; - - GLenum m_current_depth_test = 0; - GPUTransparencyMode m_current_transparency_mode = GPUTransparencyMode::Disabled; - BatchRenderMode m_current_render_mode = BatchRenderMode::TransparencyDisabled; - - GL::Texture m_downsample_texture; - GL::Program m_downsample_program; -}; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index cf0103d22..bc32c2ed4 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1162,6 +1162,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) ss << "layout(std430"; if (IsVulkan()) ss << ", set = 0, binding = 0"; + else if (IsMetal()) + ss << ", set = 0, binding = 1"; else if (m_use_glsl_binding_layout) ss << ", binding = 0"; @@ -1327,13 +1329,37 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() return ss.str(); } +void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss) +{ + DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true); +} + +std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader() +{ + std::stringstream ss; + WriteHeader(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); + DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); + ss << R"( +{ + v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); + v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); + v_tex0 = u_uv_min + (u_uv_max - u_uv_min) * v_tex0; + #if API_OPENGL || API_OPENGL_ES || API_VULKAN + v_pos.y = -v_pos.y; + #endif +} +)"; + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass) { std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution"}, true); DefineMacro(ss, "FIRST_PASS", first_pass); // mipmap_energy.glsl ported from parallel-rsx. @@ -1368,16 +1394,16 @@ float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11) { float2 uv = v_tex0 - (u_rcp_resolution * 0.25); #ifdef FIRST_PASS - vec3 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)).rgb; - vec3 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)).rgb; - vec3 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)).rgb; - vec3 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)).rgb; + vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb; + vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb; + vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb; + vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb; o_col0 = get_bias(c00, c01, c10, c11); #else - vec4 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)); - vec4 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)); - vec4 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)); - vec4 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)); + vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)); + vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)); + vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)); + vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)); o_col0 = get_bias(c00, c01, c10, c11); #endif } @@ -1391,9 +1417,8 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader() std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float sample_level"}, - true); // mipmap_blur.glsl ported from parallel-rsx. DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false); diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 248fb12cd..8254c8fe3 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -24,6 +24,7 @@ public: std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMUpdateDepthFragmentShader(); + std::string GenerateAdaptiveDownsampleVertexShader(); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); std::string GenerateAdaptiveDownsampleBlurFragmentShader(); std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); @@ -36,6 +37,7 @@ private: void WriteCommonFunctions(std::stringstream& ss); void WriteBatchUniformBuffer(std::stringstream& ss); void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter); + void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss); u32 m_resolution_scale; u32 m_multisamples; diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp deleted file mode 100644 index 0fe0d67a0..000000000 --- a/src/core/gpu_hw_vulkan.cpp +++ /dev/null @@ -1,1996 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "gpu_hw_vulkan.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/scoped_guard.h" -#include "common/timer.h" -#include "common/vulkan/builders.h" -#include "common/vulkan/context.h" -#include "common/vulkan/shader_cache.h" -#include "common/vulkan/util.h" -#include "gpu_hw_shadergen.h" -#include "util/host_display.h" -#include "system.h" -#include "util/state_wrapper.h" -Log_SetChannel(GPU_HW_Vulkan); - -GPU_HW_Vulkan::GPU_HW_Vulkan() = default; - -GPU_HW_Vulkan::~GPU_HW_Vulkan() -{ - g_host_display->ClearDisplayTexture(); - DestroyResources(); -} - -GPURenderer GPU_HW_Vulkan::GetRendererType() const -{ - return GPURenderer::HardwareVulkan; -} - -bool GPU_HW_Vulkan::Initialize() -{ - SetCapabilities(); - - if (!GPU_HW::Initialize()) - return false; - - if (!CreatePipelineLayouts()) - { - Log_ErrorPrintf("Failed to create pipeline layouts"); - return false; - } - - if (!CreateSamplers()) - { - Log_ErrorPrintf("Failed to create samplers"); - return false; - } - - if (!CreateVertexBuffer()) - { - Log_ErrorPrintf("Failed to create vertex buffer"); - return false; - } - - if (!CreateUniformBuffer()) - { - Log_ErrorPrintf("Failed to create uniform buffer"); - return false; - } - - if (!CreateTextureBuffer()) - { - Log_ErrorPrintf("Failed to create texture buffer"); - return false; - } - - if (!CreateFramebuffer()) - { - Log_ErrorPrintf("Failed to create framebuffer"); - return false; - } - - if (!CompilePipelines()) - { - Log_ErrorPrintf("Failed to compile pipelines"); - return false; - } - - UpdateDepthBufferFromMaskBit(); - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_Vulkan::Reset(bool clear_vram) -{ - GPU_HW::Reset(clear_vram); - - EndRenderPass(); - - if (clear_vram) - ClearFramebuffer(); -} - -bool GPU_HW_Vulkan::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) -{ - if (host_texture) - { - EndRenderPass(); - - const VkImageCopy ic{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {0, 0, 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {0, 0, 0}, - {m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 1u}}; - - VkCommandBuffer buf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(buf, "GPU_HW_Vulkan::DoState"); - - if (sw.IsReading()) - { - Vulkan::Texture* tex = static_cast(*host_texture); - if (tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - return false; - } - - const VkImageLayout old_tex_layout = tex->GetLayout(); - const VkImageLayout old_vram_layout = m_vram_texture.GetLayout(); - tex->TransitionToLayout(buf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_vram_texture.TransitionToLayout(buf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdCopyImage(g_vulkan_context->GetCurrentCommandBuffer(), tex->GetImage(), tex->GetLayout(), - m_vram_texture.GetImage(), m_vram_texture.GetLayout(), 1, &ic); - m_vram_texture.TransitionToLayout(buf, old_vram_layout); - tex->TransitionToLayout(buf, old_tex_layout); - } - else - { - Vulkan::Texture* tex = static_cast(*host_texture); - if (!tex || tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != static_cast(m_vram_texture.GetSamples())) - { - delete tex; - - tex = static_cast(g_host_display - ->CreateTexture(m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 1, - 1, m_vram_texture.GetSamples(), GPUTexture::Format::RGBA8, - nullptr, 0, false) - .release()); - *host_texture = tex; - if (!tex) - return false; - } - - if (tex->GetWidth() != m_vram_texture.GetWidth() || tex->GetHeight() != m_vram_texture.GetHeight() || - tex->GetSamples() != m_vram_texture.GetSamples()) - { - return false; - } - - const VkImageLayout old_vram_layout = m_vram_texture.GetLayout(); - tex->TransitionToLayout(buf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_vram_texture.TransitionToLayout(buf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - vkCmdCopyImage(g_vulkan_context->GetCurrentCommandBuffer(), m_vram_texture.GetImage(), m_vram_texture.GetLayout(), - tex->GetImage(), tex->GetLayout(), 1, &ic); - m_vram_texture.TransitionToLayout(buf, old_vram_layout); - tex->TransitionToLayout(buf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - } - - return GPU_HW::DoState(sw, host_texture, update_display); -} - -void GPU_HW_Vulkan::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - EndRenderPass(); - - if (g_host_display->GetDisplayTextureHandle() == &m_vram_texture) - { - m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - // this is called at the end of the frame, so the UBO is associated with the previous command buffer. - m_batch_ubo_dirty = true; -} - -void GPU_HW_Vulkan::RestoreGraphicsAPIState() -{ - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::RestoreGraphicsAPIState"); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - VkDeviceSize vertex_buffer_offset = 0; - vkCmdBindVertexBuffers(cmdbuf, 0, 1, m_vertex_stream_buffer.GetBufferPointer(), &vertex_buffer_offset); - Vulkan::Util::SetViewport(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_batch_pipeline_layout, 0, 1, - &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); - SetScissorFromDrawingArea(); -} - -void GPU_HW_Vulkan::UpdateSettings() -{ - GPU_HW::UpdateSettings(); - - bool framebuffer_changed, shaders_changed; - UpdateHWSettings(&framebuffer_changed, &shaders_changed); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); - } - - // Everything should be finished executing before recreating resources. - g_host_display->ClearDisplayTexture(); - g_vulkan_context->ExecuteCommandBuffer(true); - - if (framebuffer_changed) - CreateFramebuffer(); - - if (shaders_changed) - { - // clear it since we draw a loading screen and it's not in the correct state - DestroyPipelines(); - CompilePipelines(); - } - - // this has to be done here, because otherwise we're using destroyed pipelines in the same cmdbuffer - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); - ResetGraphicsAPIState(); - } -} - -void GPU_HW_Vulkan::MapBatchVertexPointer(u32 required_vertices) -{ - DebugAssert(!m_batch_start_vertex_ptr); - - const u32 required_space = required_vertices * sizeof(BatchVertex); - if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in vertex stream buffer", required_space); - ExecuteCommandBuffer(false, true); - if (!m_vertex_stream_buffer.ReserveMemory(required_space, sizeof(BatchVertex))) - Panic("Failed to reserve vertex stream buffer memory"); - } - - m_batch_start_vertex_ptr = reinterpret_cast(m_vertex_stream_buffer.GetCurrentHostPointer()); - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + (m_vertex_stream_buffer.GetCurrentSpace() / sizeof(BatchVertex)); - m_batch_base_vertex = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(BatchVertex); -} - -void GPU_HW_Vulkan::UnmapBatchVertexPointer(u32 used_vertices) -{ - DebugAssert(m_batch_start_vertex_ptr); - if (used_vertices > 0) - m_vertex_stream_buffer.CommitMemory(used_vertices * sizeof(BatchVertex)); - - m_batch_start_vertex_ptr = nullptr; - m_batch_end_vertex_ptr = nullptr; - m_batch_current_vertex_ptr = nullptr; -} - -void GPU_HW_Vulkan::UploadUniformBuffer(const void* data, u32 data_size) -{ - const u32 alignment = static_cast(g_vulkan_context->GetUniformBufferAlignment()); - if (!m_uniform_stream_buffer.ReserveMemory(data_size, alignment)) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in uniform stream buffer", data_size); - ExecuteCommandBuffer(false, true); - if (!m_uniform_stream_buffer.ReserveMemory(data_size, alignment)) - Panic("Failed to reserve uniform stream buffer memory"); - } - - m_current_uniform_buffer_offset = m_uniform_stream_buffer.GetCurrentOffset(); - std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size); - m_uniform_stream_buffer.CommitMemory(data_size); - - vkCmdBindDescriptorSets(g_vulkan_context->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, - m_batch_pipeline_layout, 0, 1, &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); -} - -void GPU_HW_Vulkan::SetCapabilities() -{ - const u32 max_texture_size = g_vulkan_context->GetDeviceLimits().maxImageDimension2D; - const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; - Log_InfoPrintf("Max texture size: %ux%u", max_texture_size, max_texture_size); - m_max_resolution_scale = max_texture_scale; - - VkImageFormatProperties color_properties = {}; - vkGetPhysicalDeviceImageFormatProperties(g_vulkan_context->GetPhysicalDevice(), VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &color_properties); - VkImageFormatProperties depth_properties = {}; - vkGetPhysicalDeviceImageFormatProperties(g_vulkan_context->GetPhysicalDevice(), VK_FORMAT_D32_SFLOAT, - VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &depth_properties); - const VkSampleCountFlags combined_properties = - g_vulkan_context->GetDeviceProperties().limits.framebufferColorSampleCounts & - g_vulkan_context->GetDeviceProperties().limits.framebufferDepthSampleCounts & color_properties.sampleCounts & - depth_properties.sampleCounts; - if (combined_properties & VK_SAMPLE_COUNT_64_BIT) - m_max_multisamples = 64; - else if (combined_properties & VK_SAMPLE_COUNT_32_BIT) - m_max_multisamples = 32; - else if (combined_properties & VK_SAMPLE_COUNT_16_BIT) - m_max_multisamples = 16; - else if (combined_properties & VK_SAMPLE_COUNT_8_BIT) - m_max_multisamples = 8; - else if (combined_properties & VK_SAMPLE_COUNT_4_BIT) - m_max_multisamples = 4; - else if (combined_properties & VK_SAMPLE_COUNT_2_BIT) - m_max_multisamples = 2; - else - m_max_multisamples = 1; - - m_supports_dual_source_blend = g_vulkan_context->GetDeviceFeatures().dualSrcBlend; - m_supports_per_sample_shading = g_vulkan_context->GetDeviceFeatures().sampleRateShading; - m_supports_adaptive_downsampling = true; - m_supports_disable_color_perspective = true; - - Log_InfoPrintf("Dual-source blend: %s", m_supports_dual_source_blend ? "supported" : "not supported"); - Log_InfoPrintf("Per-sample shading: %s", m_supports_per_sample_shading ? "supported" : "not supported"); - Log_InfoPrintf("Max multisamples: %u", m_max_multisamples); - -#ifdef __APPLE__ - // Partial texture buffer uploads appear to be broken in macOS/MoltenVK. - m_use_ssbos_for_vram_writes = true; -#else - const u32 max_texel_buffer_elements = g_vulkan_context->GetDeviceLimits().maxTexelBufferElements; - Log_InfoPrintf("Max texel buffer elements: %u", max_texel_buffer_elements); - if (max_texel_buffer_elements < (VRAM_WIDTH * VRAM_HEIGHT)) - { - Log_WarningPrintf("Texel buffer elements insufficient, using shader storage buffers instead."); - m_use_ssbos_for_vram_writes = true; - } -#endif -} - -void GPU_HW_Vulkan::DestroyResources() -{ - // Everything should be finished executing before recreating resources. - if (g_vulkan_context) - g_vulkan_context->ExecuteCommandBuffer(true); - - DestroyFramebuffer(); - DestroyPipelines(); - - Vulkan::Util::SafeDestroyPipelineLayout(m_downsample_pipeline_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_downsample_composite_descriptor_set_layout); - Vulkan::Util::SafeDestroyPipelineLayout(m_downsample_composite_pipeline_layout); - - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_write_descriptor_set); - Vulkan::Util::SafeDestroyBufferView(m_texture_stream_buffer_view); - - m_vertex_stream_buffer.Destroy(false); - m_uniform_stream_buffer.Destroy(false); - m_texture_stream_buffer.Destroy(false); - - Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout); - Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout); - Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout); - Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout); - Vulkan::Util::SafeDestroySampler(m_point_sampler); - Vulkan::Util::SafeDestroySampler(m_linear_sampler); - Vulkan::Util::SafeDestroySampler(m_trilinear_sampler); -} - -void GPU_HW_Vulkan::BeginRenderPass(VkRenderPass render_pass, VkFramebuffer framebuffer, u32 x, u32 y, u32 width, - u32 height, const VkClearValue* clear_value /* = nullptr */) -{ - DebugAssert(m_current_render_pass == VK_NULL_HANDLE); - - const VkRenderPassBeginInfo bi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - render_pass, - framebuffer, - {{static_cast(x), static_cast(y)}, {width, height}}, - (clear_value ? 1u : 0u), - clear_value}; - Vulkan::Util::BeginDebugScope(g_vulkan_context->GetCurrentCommandBuffer(), "GPU_HW_Vulkan::BeginRenderPass"); - vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); - m_current_render_pass = render_pass; -} - -void GPU_HW_Vulkan::BeginVRAMRenderPass() -{ - if (m_current_render_pass == m_vram_render_pass) - return; - - EndRenderPass(); - BeginRenderPass(m_vram_render_pass, m_vram_framebuffer, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); -} - -void GPU_HW_Vulkan::EndRenderPass() -{ - if (m_current_render_pass == VK_NULL_HANDLE) - return; - - vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); - Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); - m_current_render_pass = VK_NULL_HANDLE; -} - -void GPU_HW_Vulkan::ExecuteCommandBuffer(bool wait_for_completion, bool restore_state) -{ - EndRenderPass(); - g_vulkan_context->ExecuteCommandBuffer(wait_for_completion); - m_batch_ubo_dirty = true; - if (restore_state) - RestoreGraphicsAPIState(); -} - -bool GPU_HW_Vulkan::CreatePipelineLayouts() -{ - VkDevice device = g_vulkan_context->GetDevice(); - - Vulkan::DescriptorSetLayoutBuilder dslbuilder; - dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); - dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_batch_descriptor_set_layout = dslbuilder.Create(device); - if (m_batch_descriptor_set_layout == VK_NULL_HANDLE) - return false; - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_batch_descriptor_set_layout, - "Batch Descriptor Set Layout"); - - // textures start at 1 - dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_single_sampler_descriptor_set_layout = dslbuilder.Create(device); - if (m_single_sampler_descriptor_set_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_single_sampler_descriptor_set_layout, - "Single Sampler Descriptor Set Layout"); - - if (m_use_ssbos_for_vram_writes) - dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - else - dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_vram_write_descriptor_set_layout = dslbuilder.Create(device); - if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_write_descriptor_set_layout, - "VRAM Write Descriptor Set Layout"); - - Vulkan::PipelineLayoutBuilder plbuilder; - plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout); - m_batch_pipeline_layout = plbuilder.Create(device); - if (m_batch_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_batch_pipeline_layout, "Batch Pipeline Layout"); - - plbuilder.AddDescriptorSet(m_single_sampler_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); - m_single_sampler_pipeline_layout = plbuilder.Create(device); - if (m_single_sampler_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_single_sampler_pipeline_layout, - "Single Sampler Pipeline Layout"); - - plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); - m_no_samplers_pipeline_layout = plbuilder.Create(device); - if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_no_samplers_pipeline_layout, - "No Samplers Pipeline Layout"); - - plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); - m_vram_write_pipeline_layout = plbuilder.Create(device); - if (m_vram_write_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_write_pipeline_layout, - "VRAM Write Pipeline Layout"); - - plbuilder.AddDescriptorSet(m_single_sampler_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); - m_downsample_pipeline_layout = plbuilder.Create(device); - if (m_downsample_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_pipeline_layout, - "Downsample Pipeline Layout"); - - dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - dslbuilder.AddBinding(2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_downsample_composite_descriptor_set_layout = dslbuilder.Create(device); - if (m_downsample_composite_descriptor_set_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), - - m_downsample_composite_descriptor_set_layout, - "Downsample Composite Descriptor Set Layout"); - - plbuilder.AddDescriptorSet(m_downsample_composite_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE); - m_downsample_composite_pipeline_layout = plbuilder.Create(device); - if (m_downsample_composite_pipeline_layout == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pipeline_layout, - "Downsample Composite Pipeline Layout"); - - return true; -} - -bool GPU_HW_Vulkan::CreateSamplers() -{ - VkDevice device = g_vulkan_context->GetDevice(); - - Vulkan::SamplerBuilder sbuilder; - sbuilder.SetPointSampler(VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - sbuilder.SetAddressMode(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - VK_SAMPLER_ADDRESS_MODE_REPEAT); - m_point_sampler = sbuilder.Create(device); - if (m_point_sampler == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Point Sampler"); - - sbuilder.SetLinearSampler(false, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - sbuilder.SetAddressMode(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - VK_SAMPLER_ADDRESS_MODE_REPEAT); - m_linear_sampler = sbuilder.Create(device); - if (m_linear_sampler == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_linear_sampler, "Linear Sampler"); - - sbuilder.SetLinearSampler(true, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - m_trilinear_sampler = sbuilder.Create(device); - if (m_trilinear_sampler == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_trilinear_sampler, "Trilinear Sampler"); - - return true; -} - -bool GPU_HW_Vulkan::CreateFramebuffer() -{ - DestroyFramebuffer(); - - // scale vram size to internal resolution - const u32 texture_width = VRAM_WIDTH * m_resolution_scale; - const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; - const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM; - const VkFormat depth_format = VK_FORMAT_D16_UNORM; - const VkSampleCountFlagBits samples = static_cast(m_multisamples); - - if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, - true) || - !m_vram_depth_texture.Create(texture_width, texture_height, 1, 1, depth_format, samples, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, true) || - !m_display_texture.Create( - ((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) * m_resolution_scale, - GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT, - true) || - !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, true)) - { - return false; - } - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_texture.GetImage(), "VRAM Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_texture.GetView(), "VRAM Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_texture.GetAllocation(), "VRAM Texture Memory"); - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_depth_texture.GetImage(), "VRAM Depth Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_depth_texture.GetView(), "VRAM Depth Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_depth_texture.GetAllocation(), - "VRAM Depth Texture Memory"); - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_read_texture.GetImage(), "VRAM Read Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_read_texture.GetView(), "VRAM Read Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_read_texture.GetAllocation(), - "VRAM Read Texture Memory"); - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_display_texture.GetImage(), "Display Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_display_texture.GetView(), "Display Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_display_texture.GetAllocation(), - "Display Texture Memory"); - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_texture.GetImage(), - "VRAM Readback Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_texture.GetView(), - "VRAM Readback Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_texture.GetAllocation(), - "VRAM Readback Texture Memory"); - - m_vram_render_pass = - g_vulkan_context->GetRenderPass(texture_format, depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); - m_vram_update_depth_render_pass = - g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - m_display_load_render_pass = g_vulkan_context->GetRenderPass( - m_display_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, m_display_texture.GetVkSamples(), VK_ATTACHMENT_LOAD_OP_LOAD); - m_display_discard_render_pass = - g_vulkan_context->GetRenderPass(m_display_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, - m_display_texture.GetVkSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - m_vram_readback_render_pass = - g_vulkan_context->GetRenderPass(m_vram_readback_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, - m_vram_readback_texture.GetVkSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - - if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE || - m_display_load_render_pass == VK_NULL_HANDLE || m_vram_readback_render_pass == VK_NULL_HANDLE) - { - return false; - } - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_render_pass, "VRAM Render Pass"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_render_pass, - "VRAM Update Depth Render Pass"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_display_load_render_pass, "Display Load Render Pass"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_render_pass, "VRAM Readback Render Pass"); - - // vram framebuffer has both colour and depth - Vulkan::FramebufferBuilder fbb; - fbb.AddAttachment(m_vram_texture.GetView()); - fbb.AddAttachment(m_vram_depth_texture.GetView()); - fbb.SetRenderPass(m_vram_render_pass); - fbb.SetSize(m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), m_vram_texture.GetLayers()); - m_vram_framebuffer = fbb.Create(g_vulkan_context->GetDevice()); - if (m_vram_framebuffer == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_framebuffer, "VRAM Framebuffer"); - - m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass); - m_vram_readback_framebuffer = m_vram_readback_texture.CreateFramebuffer(m_vram_readback_render_pass); - m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_load_render_pass); - if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_vram_readback_framebuffer == VK_NULL_HANDLE || - m_display_framebuffer == VK_NULL_HANDLE) - { - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_framebuffer, - "VRAM Update Depth Framebuffer"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_framebuffer, "VRAM Readback Framebuffer"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_display_framebuffer, "Display Framebuffer"); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::CreateFramebuffer"); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - Vulkan::DescriptorSetUpdateBuilder dsubuilder; - - m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout); - m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); - m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); - m_display_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); - if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE || - m_vram_read_descriptor_set == VK_NULL_HANDLE || m_display_descriptor_set == VK_NULL_HANDLE) - { - return false; - } - - dsubuilder.AddBufferDescriptorWrite(m_batch_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - m_uniform_stream_buffer.GetBuffer(), 0, sizeof(BatchUBOData)); - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_batch_descriptor_set, 1, m_vram_read_texture.GetView(), - m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_copy_descriptor_set, 1, m_vram_read_texture.GetView(), - m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(), - m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_display_descriptor_set, 1, m_display_texture.GetView(), - m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - dsubuilder.Update(g_vulkan_context->GetDevice()); - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - const u32 levels = GetAdaptiveDownsamplingMipLevels(); - - if (!m_downsample_texture.Create(texture_width, texture_height, levels, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT) || - !m_downsample_weight_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, VK_FORMAT_R8_UNORM, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT)) - { - return false; - } - - m_downsample_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - m_downsample_render_pass = g_vulkan_context->GetRenderPass(m_downsample_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, - VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - m_downsample_weight_render_pass = - g_vulkan_context->GetRenderPass(m_downsample_weight_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, - VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (m_downsample_render_pass == VK_NULL_HANDLE || m_downsample_weight_render_pass == VK_NULL_HANDLE) - return false; - - m_downsample_weight_framebuffer = m_downsample_weight_texture.CreateFramebuffer(m_downsample_weight_render_pass); - if (m_downsample_weight_framebuffer == VK_NULL_HANDLE) - return false; - - m_downsample_mip_views.resize(levels); - for (u32 i = 0; i < levels; i++) - { - SmoothMipView& mv = m_downsample_mip_views[i]; - - const VkImageViewCreateInfo vci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - m_downsample_texture.GetImage(), - VK_IMAGE_VIEW_TYPE_2D, - m_downsample_texture.GetVkFormat(), - {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}, - {VK_IMAGE_ASPECT_COLOR_BIT, i, 1u, 0u, 1u}}; - VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &vci, nullptr, &mv.image_view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView() for smooth mip failed: "); - return false; - } - - mv.descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout); - if (mv.descriptor_set == VK_NULL_HANDLE) - return false; - - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_downsample_mip_views[i].descriptor_set, 1, - m_downsample_mip_views[i].image_view, m_point_sampler, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - fbb.AddAttachment(mv.image_view); - fbb.SetRenderPass(m_downsample_render_pass); - fbb.SetSize(texture_width >> i, texture_height >> i, 1); - mv.framebuffer = fbb.Create(g_vulkan_context->GetDevice()); - if (mv.framebuffer == VK_NULL_HANDLE) - return false; - } - - m_downsample_composite_descriptor_set = - g_vulkan_context->AllocateGlobalDescriptorSet(m_downsample_composite_descriptor_set_layout); - if (m_downsample_composite_descriptor_set_layout == VK_NULL_HANDLE) - return false; - - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_downsample_composite_descriptor_set, 1, - m_downsample_texture.GetView(), m_trilinear_sampler, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_downsample_composite_descriptor_set, 2, - m_downsample_weight_texture.GetView(), m_linear_sampler, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - dsubuilder.Update(g_vulkan_context->GetDevice()); - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - if (!m_downsample_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) - { - return false; - } - - m_downsample_render_pass = g_vulkan_context->GetRenderPass(m_downsample_texture.GetVkFormat(), VK_FORMAT_UNDEFINED, - VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - - m_downsample_mip_views.resize(1); - m_downsample_mip_views[0].framebuffer = m_downsample_texture.CreateFramebuffer(m_downsample_render_pass); - if (m_downsample_mip_views[0].framebuffer == VK_NULL_HANDLE) - return false; - } - - ClearDisplay(); - SetFullVRAMDirtyRectangle(); - return true; -} - -void GPU_HW_Vulkan::ClearFramebuffer() -{ - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - static constexpr VkClearColorValue cc = {}; - const VkClearDepthStencilValue cds = {m_pgxp_depth_buffer ? 1.0f : 0.0f, 0u}; - static constexpr VkImageSubresourceRange csrr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; - static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; - vkCmdClearColorImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), &cc, 1u, &csrr); - vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u, - &dsrr); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - m_last_depth_z = 1.0f; - - SetFullVRAMDirtyRectangle(); -} - -void GPU_HW_Vulkan::DestroyFramebuffer() -{ - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_downsample_composite_descriptor_set); - - for (SmoothMipView& mv : m_downsample_mip_views) - { - Vulkan::Util::SafeFreeGlobalDescriptorSet(mv.descriptor_set); - Vulkan::Util::SafeDestroyImageView(mv.image_view); - Vulkan::Util::SafeDestroyFramebuffer(mv.framebuffer); - } - m_downsample_mip_views.clear(); - m_downsample_texture.Destroy(false); - Vulkan::Util::SafeDestroyFramebuffer(m_downsample_weight_framebuffer); - m_downsample_weight_texture.Destroy(false); - - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set); - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set); - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set); - Vulkan::Util::SafeFreeGlobalDescriptorSet(m_display_descriptor_set); - - Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer); - Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer); - Vulkan::Util::SafeDestroyFramebuffer(m_vram_readback_framebuffer); - Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer); - - m_vram_read_texture.Destroy(false); - m_vram_depth_texture.Destroy(false); - m_vram_texture.Destroy(false); - m_vram_readback_texture.Destroy(false); - m_display_texture.Destroy(false); -} - -bool GPU_HW_Vulkan::CreateVertexBuffer() -{ - if (!m_vertex_stream_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE)) - return false; - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vertex_stream_buffer.GetBuffer(), - "Vertex Stream Buffer"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vertex_stream_buffer.GetAllocation(), - "Vertex Stream Buffer Memory"); - return true; -} - -bool GPU_HW_Vulkan::CreateUniformBuffer() -{ - if (!m_uniform_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_BUFFER_SIZE)) - return false; - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_uniform_stream_buffer.GetBuffer(), - "Uniform Stream Buffer"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_uniform_stream_buffer.GetAllocation(), - "Uniform Stream Buffer Memory"); - return true; -} - -bool GPU_HW_Vulkan::CreateTextureBuffer() -{ - if (m_use_ssbos_for_vram_writes) - { - if (!m_texture_stream_buffer.Create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) - return false; - - m_vram_write_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_write_descriptor_set_layout); - if (m_vram_write_descriptor_set == VK_NULL_HANDLE) - return false; - - Vulkan::DescriptorSetUpdateBuilder dsubuilder; - dsubuilder.AddBufferDescriptorWrite(m_vram_write_descriptor_set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - m_texture_stream_buffer.GetBuffer(), 0, - m_texture_stream_buffer.GetCurrentSize()); - dsubuilder.Update(g_vulkan_context->GetDevice()); - return true; - } - else - { - if (!m_texture_stream_buffer.Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) - return false; - Vulkan::BufferViewBuilder bvbuilder; - bvbuilder.Set(m_texture_stream_buffer.GetBuffer(), VK_FORMAT_R16_UINT, 0, m_texture_stream_buffer.GetCurrentSize()); - m_texture_stream_buffer_view = bvbuilder.Create(g_vulkan_context->GetDevice()); - if (m_texture_stream_buffer_view == VK_NULL_HANDLE) - return false; - - m_vram_write_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_write_descriptor_set_layout); - if (m_vram_write_descriptor_set == VK_NULL_HANDLE) - return false; - - Vulkan::DescriptorSetUpdateBuilder dsubuilder; - dsubuilder.AddBufferViewDescriptorWrite(m_vram_write_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - m_texture_stream_buffer_view); - dsubuilder.Update(g_vulkan_context->GetDevice()); - } - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_texture_stream_buffer.GetBuffer(), - "Texture Stream Buffer"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_texture_stream_buffer.GetAllocation(), - "Texture Stream Buffer Memory"); - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_write_descriptor_set, "VRAM Write Descriptor Set"); - - return true; -} - -bool GPU_HW_Vulkan::CompilePipelines() -{ - VkDevice device = g_vulkan_context->GetDevice(); - VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); - - GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); - - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + - (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); - - // vertex shaders - [textured] - // fragment shaders - [render_mode][texture_mode][dithering][interlacing] - DimensionalArray batch_vertex_shaders{}; - DimensionalArray batch_fragment_shaders{}; - ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { - batch_vertex_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule); - batch_fragment_shaders.enumerate(Vulkan::Util::SafeDestroyShaderModule); - }); - - for (u8 textured = 0; textured < 2; textured++) - { - const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); - VkShaderModule shader = g_vulkan_shader_cache->GetVertexShader(vs); - if (shader == VK_NULL_HANDLE) - return false; - - batch_vertex_shaders[textured] = shader; - progress.Increment(); - } - - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - - VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs); - if (shader == VK_NULL_HANDLE) - return false; - - batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = shader; - progress.Increment(); - } - } - } - } - - Vulkan::GraphicsPipelineBuilder gpbuilder; - - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - for (u8 depth_test = 0; depth_test < 3; depth_test++) - { - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - static constexpr std::array depth_test_values = { - VK_COMPARE_OP_ALWAYS, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL}; - const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); - - gpbuilder.SetPipelineLayout(m_batch_pipeline_layout); - gpbuilder.SetRenderPass(m_vram_render_pass, 0); - - gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX); - gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(BatchVertex, x)); - gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color)); - if (textured) - { - gpbuilder.AddVertexAttribute(2, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, u)); - gpbuilder.AddVertexAttribute(3, 0, VK_FORMAT_R32_UINT, offsetof(BatchVertex, texpage)); - if (m_using_uv_limits) - gpbuilder.AddVertexAttribute(4, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, uv_limits)); - } - - gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - gpbuilder.SetVertexShader(batch_vertex_shaders[BoolToUInt8(textured)]); - gpbuilder.SetFragmentShader(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing]); - - gpbuilder.SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); - gpbuilder.SetDepthState(true, true, depth_test_values[depth_test]); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetMultisamples(m_multisamples, m_per_sample_shading); - - if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - m_texture_filtering != GPUTextureFilter::Nearest) - { - if (m_supports_dual_source_blend) - { - gpbuilder.SetBlendAttachment( - 0, true, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC1_ALPHA, - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - VK_BLEND_OP_REVERSE_SUBTRACT : - VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD); - } - else - { - const float factor = (static_cast(transparency_mode) == - GPUTransparencyMode::HalfBackgroundPlusHalfForeground) ? - 0.5f : - 1.0f; - gpbuilder.SetBlendAttachment( - 0, true, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_CONSTANT_ALPHA, - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - VK_BLEND_OP_REVERSE_SUBTRACT : - VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD); - gpbuilder.SetBlendConstants(0.0f, 0.0f, 0.0f, factor); - } - } - - gpbuilder.SetDynamicViewportAndScissorState(); - - VkPipeline pipeline = gpbuilder.Create(device, pipeline_cache); - if (pipeline == VK_NULL_HANDLE) - return false; - - m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] = - pipeline; - progress.Increment(); - } - } - } - } - } - } - - batch_shader_guard.Run(); - - VkShaderModule fullscreen_quad_vertex_shader = - g_vulkan_shader_cache->GetVertexShader(shadergen.GenerateScreenQuadVertexShader()); - if (fullscreen_quad_vertex_shader == VK_NULL_HANDLE) - return false; - VkShaderModule uv_quad_vertex_shader = g_vulkan_shader_cache->GetVertexShader(shadergen.GenerateUVQuadVertexShader()); - if (uv_quad_vertex_shader == VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), uv_quad_vertex_shader, nullptr); - return false; - } - - progress.Increment(); - - ScopedGuard fullscreen_quad_vertex_shader_guard([&fullscreen_quad_vertex_shader, &uv_quad_vertex_shader]() { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fullscreen_quad_vertex_shader, nullptr); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), uv_quad_vertex_shader, nullptr); - }); - - // common state - gpbuilder.SetRenderPass(m_vram_render_pass, 0); - gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); - gpbuilder.SetMultisamples(m_multisamples, false); - - // VRAM fill - for (u8 wrapped = 0; wrapped < 2; wrapped++) - { - for (u8 interlaced = 0; interlaced < 2; interlaced++) - { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader( - shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetPipelineLayout(m_no_samplers_pipeline_layout); - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); - - m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(device, fs, nullptr); - if (m_vram_fill_pipelines[wrapped][interlaced] == VK_NULL_HANDLE) - return false; - - progress.Increment(); - } - } - - // VRAM copy - { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMCopyFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetFragmentShader(fs); - for (u8 depth_test = 0; depth_test < 2; depth_test++) - { - gpbuilder.SetDepthState((depth_test != 0), true, - (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); - - m_vram_copy_pipelines[depth_test] = gpbuilder.Create(device, pipeline_cache, false); - if (m_vram_copy_pipelines[depth_test] == VK_NULL_HANDLE) - { - vkDestroyShaderModule(device, fs, nullptr); - return false; - } - - progress.Increment(); - } - - vkDestroyShaderModule(device, fs, nullptr); - } - - // VRAM write - { - VkShaderModule fs = - g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbos_for_vram_writes)); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetPipelineLayout(m_vram_write_pipeline_layout); - gpbuilder.SetFragmentShader(fs); - for (u8 depth_test = 0; depth_test < 2; depth_test++) - { - gpbuilder.SetDepthState(true, true, (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); - m_vram_write_pipelines[depth_test] = gpbuilder.Create(device, pipeline_cache, false); - if (m_vram_write_pipelines[depth_test] == VK_NULL_HANDLE) - { - vkDestroyShaderModule(device, fs, nullptr); - return false; - } - - progress.Increment(); - } - - vkDestroyShaderModule(device, fs, nullptr); - } - - // VRAM update depth - { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMUpdateDepthFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetRenderPass(m_vram_update_depth_render_pass, 0); - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); - gpbuilder.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0); - - m_vram_update_depth_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(device, fs, nullptr); - if (m_vram_update_depth_pipeline == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_pipeline, - "VRAM Update Depth Pipeline"); - - progress.Increment(); - } - - gpbuilder.Clear(); - - // VRAM read - { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMReadFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetRenderPass(m_vram_readback_render_pass, 0); - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - - m_vram_readback_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(device, fs, nullptr); - if (m_vram_readback_pipeline == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_readback_pipeline, "VRAM Read Pipeline"); - - progress.Increment(); - } - - gpbuilder.Clear(); - - // Display - { - gpbuilder.SetRenderPass(m_display_load_render_pass, 0); - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - - for (u8 depth_24 = 0; depth_24 < 2; depth_24++) - { - for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) - { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - - m_display_pipelines[depth_24][interlace_mode] = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(device, fs, nullptr); - if (m_display_pipelines[depth_24][interlace_mode] == VK_NULL_HANDLE) - return false; - - progress.Increment(); - } - } - } - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - gpbuilder.Clear(); - gpbuilder.SetRenderPass(m_downsample_render_pass, 0); - gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout); - gpbuilder.SetVertexShader(uv_quad_vertex_shader); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - - VkShaderModule fs = - g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, - "Downsample First Pass Pipeline"); - - fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline, - "Downsample Mid Pass Pipeline"); - - fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0); - m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE) - return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline, - "Downsample Blur Pass Pipeline"); - - fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout); - gpbuilder.SetRenderPass(m_display_load_render_pass, 0); - m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE) - return false; - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline, - "Downsample Composite Pass Pipeline"); - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - gpbuilder.Clear(); - gpbuilder.SetRenderPass(m_downsample_render_pass, 0); - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateBoxSampleDownsampleFragmentShader()); - if (fs == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(fs); - m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) - return false; - - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, - "Downsample First Pass Pipeline"); - } - - progress.Increment(); - -#undef UPDATE_PROGRESS - - return true; -} - -void GPU_HW_Vulkan::DestroyPipelines() -{ - m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); - - m_vram_fill_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); - - for (VkPipeline& p : m_vram_write_pipelines) - Vulkan::Util::SafeDestroyPipeline(p); - - for (VkPipeline& p : m_vram_copy_pipelines) - Vulkan::Util::SafeDestroyPipeline(p); - - Vulkan::Util::SafeDestroyPipeline(m_vram_readback_pipeline); - Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline); - - Vulkan::Util::SafeDestroyPipeline(m_downsample_first_pass_pipeline); - Vulkan::Util::SafeDestroyPipeline(m_downsample_mid_pass_pipeline); - Vulkan::Util::SafeDestroyPipeline(m_downsample_blur_pass_pipeline); - Vulkan::Util::SafeDestroyPipeline(m_downsample_composite_pass_pipeline); - - m_display_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); -} - -void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) -{ - BeginVRAMRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::DrawBatchVertices: [%u,%u)", base_vertex, - base_vertex + num_vertices); - - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - const u8 depth_test = m_batch.use_depth_buffer ? static_cast(2) : BoolToUInt8(m_batch.check_mask_before_draw); - VkPipeline pipeline = - m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( - m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; - - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - vkCmdDraw(cmdbuf, num_vertices, 1, base_vertex, 0); -} - -void GPU_HW_Vulkan::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "GPU_HW_Vulkan::SetScissorFromDrawingArea: {%u,%u} {%u,%u}", left, top, - right, bottom); - Vulkan::Util::SetScissor(g_vulkan_context->GetCurrentCommandBuffer(), left, top, right - left, bottom - top); -} - -void GPU_HW_Vulkan::ClearDisplay() -{ - GPU_HW::ClearDisplay(); - EndRenderPass(); - - g_host_display->ClearDisplayTexture(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::ClearDisplay"); - m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - static const VkClearColorValue cc = {{0.0f, 0.0f, 0.0f, 1.0f}}; - static const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdClearColorImage(cmdbuf, m_display_texture.GetImage(), m_display_texture.GetLayout(), &cc, 1, &srr); -} - -void GPU_HW_Vulkan::UpdateDisplay() -{ - GPU_HW::UpdateDisplay(); - EndRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::UpdateDisplay"); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - if (m_vram_dirty_rect.Intersects( - Common::Rectangle::FromExtents(m_crtc_state.display_vram_left, m_crtc_state.display_vram_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height))) - { - UpdateVRAMReadTexture(); - } - - g_host_display->SetDisplayTexture(&m_vram_read_texture, 0, 0, m_vram_read_texture.GetWidth(), - m_vram_read_texture.GetHeight()); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - } - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) - { - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(&m_vram_texture, scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - } - else - { - EndRenderPass(); - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - - m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - Assert(scaled_display_width <= m_display_texture.GetWidth() && - scaled_display_height <= m_display_texture.GetHeight()); - - BeginRenderPass((interlaced != InterlacedRenderMode::None) ? m_display_load_render_pass : - m_display_discard_render_pass, - m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height); - - vkCmdBindPipeline( - cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)]); - vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - uniforms); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, scaled_display_width, scaled_display_height); - - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - EndRenderPass(); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(&m_display_texture, 0, 0, scaled_display_width, scaled_display_height); - RestoreGraphicsAPIState(); - } - } - } -} - -void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - { - ReadSoftwareRendererVRAM(x, y, width, height); - return; - } - - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); - - EndRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::ReadVRAM: %u %u %ux%u", x, y, width, height); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use - // the actual size we're rendering to... - const u32 rp_width = std::max(16, encoded_width); - const u32 rp_height = std::max(16, encoded_height); - BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, rp_width, rp_height); - - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline); - vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - uniforms); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - EndRenderPass(); - - m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Stage the readback and copy it into our shadow buffer (will execute command buffer and stall). - g_host_display->DownloadTexture(&m_vram_readback_texture, 0, 0, encoded_width, encoded_height, - &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left], - VRAM_WIDTH * sizeof(u16)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - if (IsUsingSoftwareRendererForReadbacks()) - FillSoftwareRendererVRAM(x, y, width, height, color); - - GPU_HW::FillVRAM(x, y, width, height, color); - - BeginVRAMRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::FillVRAM: {%u,%u} %ux%u %08x", x, y, width, height, - color); - - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - &uniforms); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] - [BoolToUInt8(IsInterlacedRenderingEnabled())]); - - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - Vulkan::Util::SetViewportAndScissor(cmdbuf, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, - bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) -{ - if (IsUsingSoftwareRendererForReadbacks()) - UpdateSoftwareRendererVRAM(x, y, width, height, data, set_mask, check_mask); - - const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, set_mask, check_mask); - - if (!check_mask) - { - const TextureReplacementTexture* rtex = g_texture_replacements.GetVRAMWriteReplacement(width, height, data); - if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, - width * m_resolution_scale, height * m_resolution_scale)) - { - return; - } - } - - const u32 data_size = width * height * sizeof(u16); - const u32 alignment = std::max(sizeof(u32), static_cast(m_use_ssbos_for_vram_writes ? - g_vulkan_context->GetStorageBufferAlignment() : - g_vulkan_context->GetTexelBufferAlignment())); - if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) - { - Log_PerfPrintf("Executing command buffer while waiting for %u bytes in stream buffer", data_size); - ExecuteCommandBuffer(false, true); - if (!m_texture_stream_buffer.ReserveMemory(data_size, alignment)) - { - Panic("Failed to allocate space in stream buffer for VRAM write"); - return; - } - } - - const u32 start_index = m_texture_stream_buffer.GetCurrentOffset() / sizeof(u16); - std::memcpy(m_texture_stream_buffer.GetCurrentHostPointer(), data, data_size); - m_texture_stream_buffer.CommitMemory(data_size); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::UpdateVRAM: {%u,%u} %ux%u", x, y, width, height); - - BeginVRAMRenderPass(); - - const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, set_mask, check_mask); - vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - &uniforms); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)]); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1, - &m_vram_write_descriptor_set, 0, nullptr); - - // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), - scaled_bounds.GetHeight()); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::CopyVRAM: {%u, %u} {%u, %u} %ux%u", src_x, src_y, - dst_x, dst_y, width, height); - if (IsUsingSoftwareRendererForReadbacks()) - CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); - - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) - { - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - if (m_vram_dirty_rect.Intersects(src_bounds)) - UpdateVRAMReadTexture(); - IncludeVRAMDirtyRectangle(dst_bounds); - - const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - - BeginVRAMRenderPass(); - - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)]); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_copy_descriptor_set, 0, nullptr); - vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), - &uniforms); - Vulkan::Util::SetViewportAndScissor(cmdbuf, dst_bounds_scaled.left, dst_bounds_scaled.top, - dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - RestoreGraphicsAPIState(); - - if (m_GPUSTAT.check_mask_before_draw) - m_current_depth++; - - return; - } - - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); - - src_x *= m_resolution_scale; - src_y *= m_resolution_scale; - dst_x *= m_resolution_scale; - dst_y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - EndRenderPass(); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_GENERAL); - - const VkImageCopy ic{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(src_x), static_cast(src_y), 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(dst_x), static_cast(dst_y), 0}, - {width, height, 1u}}; - vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_texture.GetImage(), - m_vram_texture.GetLayout(), 1, &ic); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); -} - -void GPU_HW_Vulkan::UpdateVRAMReadTexture() -{ - EndRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::UpdateVRAMReadTexture"); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; - - if (m_vram_texture.GetSamples() > VK_SAMPLE_COUNT_1_BIT) - { - const VkImageResolve resolve{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; - vkCmdResolveImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), - m_vram_read_texture.GetLayout(), 1, &resolve); - } - else - { - const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, - {scaled_rect.GetWidth(), scaled_rect.GetHeight(), 1u}}; - - vkCmdCopyImage(cmdbuf, m_vram_texture.GetImage(), m_vram_texture.GetLayout(), m_vram_read_texture.GetImage(), - m_vram_read_texture.GetLayout(), 1u, ©); - } - - m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - GPU_HW::UpdateVRAMReadTexture(); -} - -void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - EndRenderPass(); - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit"); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - BeginRenderPass(m_vram_update_depth_render_pass, m_vram_update_depth_framebuffer, 0, 0, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight()); - - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - EndRenderPass(); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_Vulkan::ClearDepthBuffer() -{ - EndRenderPass(); - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::ClearDepthBuffer"); - m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - static const VkClearDepthStencilValue cds = {1.0f}; - static constexpr VkImageSubresourceRange dsrr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; - vkCmdClearDepthStencilImage(cmdbuf, m_vram_depth_texture.GetImage(), m_vram_depth_texture.GetLayout(), &cds, 1u, - &dsrr); - - m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - m_last_depth_z = 1.0f; -} - -bool GPU_HW_Vulkan::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::BlitVRAMReplacementTexture: {%u,%u} %ux%u", dst_x, - dst_y, width, height); - if (m_vram_write_replacement_texture.GetWidth() < tex->GetWidth() || - m_vram_write_replacement_texture.GetHeight() < tex->GetHeight()) - { - if (!m_vram_write_replacement_texture.Create(tex->GetWidth(), tex->GetHeight(), 1, 1, VK_FORMAT_R8G8B8A8_UNORM, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_DST_BIT)) - { - Log_ErrorPrint("Failed to create VRAM write replacement texture"); - return false; - } - } - - m_vram_write_replacement_texture.Update(0, 0, tex->GetWidth(), tex->GetHeight(), 0, 0, tex->GetPixels(), - tex->GetPitch()); - - // texture -> vram - const VkImageBlit blit = { - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - { - {0, 0, 0}, - {static_cast(tex->GetWidth()), static_cast(tex->GetHeight()), 1}, - }, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {{static_cast(dst_x), static_cast(dst_y), 0}, - {static_cast(dst_x + width), static_cast(dst_y + height), 1}}, - }; - m_vram_write_replacement_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdBlitImage(cmdbuf, m_vram_write_replacement_texture.GetImage(), m_vram_write_replacement_texture.GetLayout(), - m_vram_texture.GetImage(), m_vram_texture.GetLayout(), 1, &blit, VK_FILTER_LINEAR); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - return true; -} - -void GPU_HW_Vulkan::DownsampleFramebuffer(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - DownsampleFramebufferAdaptive(source, left, top, width, height); - else - DownsampleFramebufferBoxFilter(source, left, top, width, height); -} - -void GPU_HW_Vulkan::DownsampleFramebufferBoxFilter(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::DownsampleFramebufferBoxFilter: {%u,%u} %ux%u", - left, top, width, height); - source.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_downsample_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - Assert(&source == &m_vram_texture || &source == &m_display_texture); - VkDescriptorSet ds = (&source == &m_vram_texture) ? m_vram_read_descriptor_set : m_display_descriptor_set; - - const u32 ds_left = left / m_resolution_scale; - const u32 ds_top = top / m_resolution_scale; - const u32 ds_width = width / m_resolution_scale; - const u32 ds_height = height / m_resolution_scale; - - static constexpr VkClearValue clear_color = {}; - BeginRenderPass(m_downsample_render_pass, m_downsample_mip_views[0].framebuffer, ds_left, ds_top, ds_width, ds_height, - &clear_color); - Vulkan::Util::SetViewportAndScissor(cmdbuf, ds_left, ds_top, ds_width, ds_height); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_first_pass_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, &ds, 0, - nullptr); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - EndRenderPass(); - - m_downsample_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_downsample_texture, ds_left, ds_top, ds_width, ds_height); -} - -void GPU_HW_Vulkan::DownsampleFramebufferAdaptive(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height) -{ - const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(left), static_cast(top), 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, - {static_cast(left), static_cast(top), 0}, - {width, height, 1u}}; - - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope outer_scope(cmdbuf, "Downsample Framebuffer Adaptive:"); - - source.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_downsample_texture.TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdCopyImage(cmdbuf, source.GetImage(), source.GetLayout(), m_downsample_texture.GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); - - m_downsample_texture.TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - // creating mip chain - const u32 levels = m_downsample_texture.GetLevels(); - for (u32 level = 1; level < levels; level++) - { - const Vulkan::Util::DebugScope mip_scope(cmdbuf, "Generate Mip: %u", level); - m_downsample_texture.TransitionSubresourcesToLayout( - cmdbuf, level, 1, 0, 1, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - static constexpr VkClearValue clear_color = {}; - BeginRenderPass(m_downsample_render_pass, m_downsample_mip_views[level].framebuffer, 0, 0, - m_downsample_texture.GetMipWidth(level), m_downsample_texture.GetMipHeight(level), &clear_color); - Vulkan::Util::SetViewportAndScissor(cmdbuf, left >> level, top >> level, width >> level, height >> level); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - (level == 1) ? m_downsample_first_pass_pipeline : m_downsample_mid_pass_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_pipeline_layout, 0, 1, - &m_downsample_mip_views[level - 1].descriptor_set, 0, nullptr); - - const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - vkCmdPushConstants(cmdbuf, m_downsample_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, - 0, sizeof(ubo), &ubo); - - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - EndRenderPass(); - - m_downsample_texture.TransitionSubresourcesToLayout( - cmdbuf, level, 1, 0, 1, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - // blur pass at lowest resolution - { - const Vulkan::Util::DebugScope blur_scope(cmdbuf, "Blur Pass at lowest resolution"); - const u32 last_level = levels - 1; - - m_downsample_weight_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - static constexpr VkClearValue clear_color = {}; - BeginRenderPass(m_downsample_weight_render_pass, m_downsample_weight_framebuffer, 0, 0, - m_downsample_texture.GetMipWidth(last_level), m_downsample_texture.GetMipHeight(last_level), - &clear_color); - Vulkan::Util::SetViewportAndScissor(cmdbuf, left >> last_level, top >> last_level, width >> last_level, - height >> last_level); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_blur_pass_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_pipeline_layout, 0, 1, - &m_downsample_mip_views[last_level].descriptor_set, 0, nullptr); - - const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - vkCmdPushConstants(cmdbuf, m_downsample_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, - 0, sizeof(ubo), &ubo); - - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - EndRenderPass(); - - m_downsample_weight_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - // resolve pass - { - const Vulkan::Util::DebugScope resolve_scope(cmdbuf, "Resolve pass"); - m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - BeginRenderPass(m_display_load_render_pass, m_display_framebuffer, left, top, width, height); - Vulkan::Util::SetViewportAndScissor(cmdbuf, left, top, width, height); - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_composite_pass_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_downsample_composite_pipeline_layout, 0, 1, - &m_downsample_composite_descriptor_set, 0, nullptr); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - EndRenderPass(); - m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_display_texture, left, top, width, height); -} - -std::unique_ptr GPU::CreateHardwareVulkanRenderer() -{ - if (!Host::AcquireHostDisplay(RenderAPI::Vulkan)) - { - Log_ErrorPrintf("Host render API is incompatible"); - return nullptr; - } - - Assert(g_vulkan_shader_cache); - std::unique_ptr gpu(std::make_unique()); - if (!gpu->Initialize()) - return nullptr; - - return gpu; -} diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h deleted file mode 100644 index fbb8200c4..000000000 --- a/src/core/gpu_hw_vulkan.h +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/dimensional_array.h" -#include "common/vulkan/stream_buffer.h" -#include "common/vulkan/texture.h" -#include "gpu_hw.h" -#include "texture_replacements.h" -#include -#include -#include - -class GPU_HW_Vulkan final : public GPU_HW -{ -public: - GPU_HW_Vulkan(); - ~GPU_HW_Vulkan() override; - - GPURenderer GetRendererType() const override; - - bool Initialize() override; - void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - - void ResetGraphicsAPIState() override; - void RestoreGraphicsAPIState() override; - void UpdateSettings() override; - -protected: - void ClearDisplay() override; - void UpdateDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateVRAMReadTexture() override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; - -private: - enum : u32 - { - MAX_PUSH_CONSTANTS_SIZE = 64, - }; - void SetCapabilities(); - void DestroyResources(); - - ALWAYS_INLINE bool InRenderPass() const { return (m_current_render_pass != VK_NULL_HANDLE); } - void BeginRenderPass(VkRenderPass render_pass, VkFramebuffer framebuffer, u32 x, u32 y, u32 width, u32 height, - const VkClearValue* clear_value = nullptr); - void BeginVRAMRenderPass(); - void EndRenderPass(); - void ExecuteCommandBuffer(bool wait_for_completion, bool restore_state); - - bool CreatePipelineLayouts(); - bool CreateSamplers(); - - bool CreateFramebuffer(); - void ClearFramebuffer(); - void DestroyFramebuffer(); - - bool CreateVertexBuffer(); - bool CreateUniformBuffer(); - bool CreateTextureBuffer(); - - bool CompilePipelines(); - void DestroyPipelines(); - - bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); - - void DownsampleFramebuffer(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferBoxFilter(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferAdaptive(Vulkan::Texture& source, u32 left, u32 top, u32 width, u32 height); - - VkRenderPass m_current_render_pass = VK_NULL_HANDLE; - - VkRenderPass m_vram_render_pass = VK_NULL_HANDLE; - VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE; - VkRenderPass m_display_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_display_discard_render_pass = VK_NULL_HANDLE; - VkRenderPass m_vram_readback_render_pass = VK_NULL_HANDLE; - - VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE; - - VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE; - VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE; - VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE; - VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE; - - Vulkan::Texture m_vram_texture; - Vulkan::Texture m_vram_depth_texture; - Vulkan::Texture m_vram_read_texture; - Vulkan::Texture m_vram_readback_texture; - Vulkan::Texture m_display_texture; - bool m_use_ssbos_for_vram_writes = false; - - VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_vram_readback_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE; - - VkSampler m_point_sampler = VK_NULL_HANDLE; - VkSampler m_linear_sampler = VK_NULL_HANDLE; - VkSampler m_trilinear_sampler = VK_NULL_HANDLE; - - VkDescriptorSet m_batch_descriptor_set = VK_NULL_HANDLE; - VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE; - VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE; - VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE; - VkDescriptorSet m_display_descriptor_set = VK_NULL_HANDLE; - - Vulkan::StreamBuffer m_vertex_stream_buffer; - Vulkan::StreamBuffer m_uniform_stream_buffer; - Vulkan::StreamBuffer m_texture_stream_buffer; - - u32 m_current_uniform_buffer_offset = 0; - VkBufferView m_texture_stream_buffer_view = VK_NULL_HANDLE; - - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - DimensionalArray m_batch_pipelines{}; - - // [wrapped][interlaced] - DimensionalArray m_vram_fill_pipelines{}; - - // [depth_test] - std::array m_vram_write_pipelines{}; - std::array m_vram_copy_pipelines{}; - - VkPipeline m_vram_readback_pipeline = VK_NULL_HANDLE; - VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE; - - // [depth_24][interlace_mode] - DimensionalArray m_display_pipelines{}; - - // texture replacements - Vulkan::Texture m_vram_write_replacement_texture; - - // downsampling - Vulkan::Texture m_downsample_texture; - VkRenderPass m_downsample_render_pass = VK_NULL_HANDLE; - Vulkan::Texture m_downsample_weight_texture; - VkRenderPass m_downsample_weight_render_pass = VK_NULL_HANDLE; - VkFramebuffer m_downsample_weight_framebuffer = VK_NULL_HANDLE; - - struct SmoothMipView - { - VkImageView image_view = VK_NULL_HANDLE; - VkDescriptorSet descriptor_set = VK_NULL_HANDLE; - VkFramebuffer framebuffer = VK_NULL_HANDLE; - }; - std::vector m_downsample_mip_views; - - VkPipelineLayout m_downsample_pipeline_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_downsample_composite_descriptor_set_layout = VK_NULL_HANDLE; - VkPipelineLayout m_downsample_composite_pipeline_layout = VK_NULL_HANDLE; - VkDescriptorSet m_downsample_composite_descriptor_set = VK_NULL_HANDLE; - VkPipeline m_downsample_first_pass_pipeline = VK_NULL_HANDLE; - VkPipeline m_downsample_mid_pass_pipeline = VK_NULL_HANDLE; - VkPipeline m_downsample_blur_pass_pipeline = VK_NULL_HANDLE; - VkPipeline m_downsample_composite_pass_pipeline = VK_NULL_HANDLE; -}; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 95e780cec..9fcbfeafd 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,14 +2,18 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_sw.h" +#include "system.h" + +#include "util/gpu_device.h" + #include "common/align.h" #include "common/assert.h" #include "common/log.h" #include "common/make_array.h" #include "common/platform.h" -#include "util/host_display.h" -#include "system.h" + #include + Log_SetChannel(GPU_SW); #if defined(CPU_X64) @@ -39,12 +43,7 @@ GPU_SW::GPU_SW() GPU_SW::~GPU_SW() { m_backend.Shutdown(); - g_host_display->ClearDisplayTexture(); -} - -GPURenderer GPU_SW::GetRendererType() const -{ - return GPURenderer::Software; + g_gpu_device->ClearDisplayTexture(); } const Threading::Thread* GPU_SW::GetSWThread() const @@ -52,6 +51,11 @@ const Threading::Thread* GPU_SW::GetSWThread() const return m_backend.GetThread(); } +bool GPU_SW::IsHardwareRenderer() const +{ + return false; +} + bool GPU_SW::Initialize() { if (!GPU::Initialize() || !m_backend.Initialize(false)) @@ -63,7 +67,7 @@ bool GPU_SW::Initialize() GPUTexture::Format::RGB565, GPUTexture::Format::RGBA5551); for (const GPUTexture::Format format : formats_for_16bit) { - if (g_host_display->SupportsTextureFormat(format)) + if (g_gpu_device->SupportsTextureFormat(format)) { m_16bit_display_format = format; break; @@ -71,7 +75,7 @@ bool GPU_SW::Initialize() } for (const GPUTexture::Format format : formats_for_24bit) { - if (g_host_display->SupportsTextureFormat(format)) + if (g_gpu_device->SupportsTextureFormat(format)) { m_24bit_display_format = format; break; @@ -105,9 +109,10 @@ GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format if (!m_display_texture || m_display_texture->GetWidth() != width || m_display_texture->GetHeight() != height || m_display_texture->GetFormat() != format) { - g_host_display->ClearDisplayTexture(); + g_gpu_device->ClearDisplayTexture(); m_display_texture.reset(); - m_display_texture = g_host_display->CreateTexture(width, height, 1, 1, 1, format, nullptr, 0, true); + m_display_texture = + g_gpu_device->CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, format, nullptr, 0, true); if (!m_display_texture) Log_ErrorPrintf("Failed to create %ux%u %u texture", width, height, static_cast(format)); } @@ -264,7 +269,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field if (!interlaced) { - if (!g_host_display->BeginTextureUpdate(texture, width, height, reinterpret_cast(&dst_ptr), &dst_stride)) + if (!texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height)) return; } else @@ -312,11 +317,11 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field } if (!interlaced) - g_host_display->EndTextureUpdate(texture, 0, 0, width, height); + texture->Unmap(); else - g_host_display->UpdateTexture(texture, 0, 0, width, height, m_display_texture_buffer.data(), output_stride); + texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); - g_host_display->SetDisplayTexture(texture, 0, 0, width, height); + g_gpu_device->SetDisplayTexture(texture, 0, 0, width, height); } void GPU_SW::CopyOut15Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field, @@ -358,7 +363,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh if (!interlaced) { - if (!g_host_display->BeginTextureUpdate(texture, width, height, reinterpret_cast(&dst_ptr), &dst_stride)) + if (!texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height)) return; } else @@ -470,11 +475,11 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh } if (!interlaced) - g_host_display->EndTextureUpdate(texture, 0, 0, width, height); + texture->Unmap(); else - g_host_display->UpdateTexture(texture, 0, 0, width, height, m_display_texture_buffer.data(), output_stride); + texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); - g_host_display->SetDisplayTexture(texture, 0, 0, width, height); + g_gpu_device->SetDisplayTexture(texture, 0, 0, width, height); } void GPU_SW::CopyOut24Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, @@ -511,14 +516,14 @@ void GPU_SW::UpdateDisplay() if (!g_settings.debugging.show_vram) { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + g_gpu_device->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, GetDisplayAspectRatio()); if (IsDisplayDisabled()) { - g_host_display->ClearDisplayTexture(); + g_gpu_device->ClearDisplayTexture(); return; } @@ -559,7 +564,7 @@ void GPU_SW::UpdateDisplay() else { CopyOut15Bit(m_16bit_display_format, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false, false); - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + g_gpu_device->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } } @@ -894,10 +899,6 @@ void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 std::unique_ptr GPU::CreateSoftwareRenderer() { - // we need something to draw in.. but keep the current api if we have one - if (!g_host_display && !Host::AcquireHostDisplay(HostDisplay::GetPreferredAPI())) - return nullptr; - std::unique_ptr gpu(std::make_unique()); if (!gpu->Initialize()) return nullptr; diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index d3ddf6b61..1aee9509e 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -2,16 +2,18 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/heap_array.h" #include "gpu.h" #include "gpu_sw_backend.h" -#include "util/host_display.h" + +#include "util/gpu_device.h" + +#include "common/heap_array.h" + #include #include #include -namespace Threading -{ +namespace Threading { class Thread; } @@ -25,8 +27,8 @@ public: ALWAYS_INLINE const GPU_SW_Backend& GetBackend() const { return m_backend; } - GPURenderer GetRendererType() const override; const Threading::Thread* GetSWThread() const override; + bool IsHardwareRenderer() const override; bool Initialize() override; bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index fc4fb2549..9ab9e2b6c 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -2,11 +2,13 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_sw_backend.h" +#include "system.h" + +#include "util/gpu_device.h" + #include "common/assert.h" #include "common/log.h" -#include "gpu_sw_backend.h" -#include "util/host_display.h" -#include "system.h" + #include Log_SetChannel(GPU_SW_Backend); diff --git a/src/core/gte.cpp b/src/core/gte.cpp index 410e0e94b..db0b2658c 100644 --- a/src/core/gte.cpp +++ b/src/core/gte.cpp @@ -2,15 +2,19 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gte.h" -#include "common/assert.h" -#include "common/bitutils.h" + #include "cpu_core.h" #include "cpu_core_private.h" -#include "util/host_display.h" #include "pgxp.h" #include "settings.h" #include "timing_event.h" + +#include "util/gpu_device.h" #include "util/state_wrapper.h" + +#include "common/assert.h" +#include "common/bitutils.h" + #include #include #include @@ -190,14 +194,14 @@ void UpdateAspectRatio() { case DisplayAspectRatio::MatchWindow: { - if (!g_host_display) + if (!g_gpu_device) { s_aspect_ratio = DisplayAspectRatio::R4_3; return; } - num = g_host_display->GetWindowWidth(); - denom = g_host_display->GetWindowHeight(); + num = g_gpu_device->GetWindowWidth(); + denom = g_gpu_device->GetWindowHeight(); } break; diff --git a/src/core/guncon.cpp b/src/core/guncon.cpp index 2784fb927..bc61daf5f 100644 --- a/src/core/guncon.cpp +++ b/src/core/guncon.cpp @@ -2,15 +2,19 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "guncon.h" -#include "common/assert.h" -#include "common/log.h" #include "gpu.h" #include "host.h" #include "resources.h" #include "system.h" -#include "util/host_display.h" + +#include "util/gpu_device.h" #include "util/state_wrapper.h" + +#include "common/assert.h" +#include "common/log.h" + #include + Log_SetChannel(GunCon); static constexpr std::array(GunCon::Button::Count)> s_button_indices = {{13, 3, 14}}; @@ -177,8 +181,8 @@ bool GunCon::Transfer(const u8 data_in, u8* data_out) void GunCon::UpdatePosition() { // get screen coordinates - const s32 mouse_x = g_host_display->GetMousePositionX(); - const s32 mouse_y = g_host_display->GetMousePositionY(); + const s32 mouse_x = g_gpu_device->GetMousePositionX(); + const s32 mouse_y = g_gpu_device->GetMousePositionY(); // are we within the active display area? u32 tick, line; diff --git a/src/core/host.cpp b/src/core/host.cpp index e96f3225c..cb50f2ccd 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -2,6 +2,12 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "host.h" +#include "common_host.h" +#include "fullscreen_ui.h" +#include "imgui_overlays.h" + +#include "util/gpu_device.h" +#include "util/imgui_manager.h" #include "common/assert.h" #include "common/heterogeneous_containers.h" @@ -155,3 +161,31 @@ void Host::ReportFormattedDebuggerMessage(const char* format, ...) ReportDebuggerMessage(message); } + +void Host::RenderDisplay(bool skip_present) +{ + Host::BeginPresentFrame(); + + // acquire for IO.MousePos. + std::atomic_thread_fence(std::memory_order_acquire); + + if (!skip_present) + { + FullscreenUI::Render(); + ImGuiManager::RenderTextOverlays(); + ImGuiManager::RenderOSDMessages(); + } + + // Debug windows are always rendered, otherwise mouse input breaks on skip. + ImGuiManager::RenderOverlayWindows(); + ImGuiManager::RenderDebugWindows(); + + g_gpu_device->Render(skip_present); + + ImGuiManager::NewFrame(); +} + +void Host::InvalidateDisplay() +{ + RenderDisplay(false); +} diff --git a/src/core/host.h b/src/core/host.h index 9d6f27aa2..bf733717c 100644 --- a/src/core/host.h +++ b/src/core/host.h @@ -101,6 +101,17 @@ void OpenURL(const std::string_view& url); /// Copies the provided text to the host's clipboard, if present. bool CopyTextToClipboard(const std::string_view& text); +/// Requests shut down and exit of the hosting application. This may not actually exit, +/// if the user cancels the shutdown confirmation. +void RequestExit(bool allow_confirm); + +/// Called before drawing the OSD and other display elements. +void BeginPresentFrame(); + +/// Provided by the host; renders the display. +void RenderDisplay(bool skip_present); +void InvalidateDisplay(); + namespace Internal { /// Implementation to retrieve a translated string. s32 GetTranslatedStringImpl(const std::string_view& context, const std::string_view& msg, char* tbuf, diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index c95013c34..5a3542248 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -13,7 +13,7 @@ #include "system.h" #include "util/audio_stream.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/imgui_fullscreen.h" #include "util/imgui_manager.h" #include "util/input_manager.h" @@ -240,6 +240,7 @@ void ImGuiManager::DrawPerformanceOverlay() if (g_settings.display_show_resolution) { + // TODO: this seems wrong? const auto [effective_width, effective_height] = g_gpu->GetEffectiveDisplayResolution(); const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool pal = g_gpu->IsInPALMode(); @@ -317,7 +318,7 @@ void ImGuiManager::DrawPerformanceOverlay() #endif } - if (g_settings.display_show_gpu && g_host_display->IsGPUTimingEnabled()) + if (g_settings.display_show_gpu && g_gpu_device->IsGPUTimingEnabled()) { text.Assign("GPU: "); FormatProcessorStat(text, System::GetGPUUsage(), System::GetGPUAverageTime()); @@ -411,8 +412,9 @@ void ImGuiManager::DrawPerformanceOverlay() void ImGuiManager::DrawEnhancementsOverlay() { LargeString text; - text.AppendFmtString("{} {}", Settings::GetConsoleRegionName(System::GetRegion()), - Settings::GetRendererName(g_gpu->GetRendererType())); + text.AppendFmtString("{} {}-{}", Settings::GetConsoleRegionName(System::GetRegion()), + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), + g_gpu->IsHardwareRenderer() ? "HW" : "SW"); if (g_settings.rewind_enable) text.AppendFormattedString(" RW=%g/%u", g_settings.rewind_save_frequency, g_settings.rewind_save_slots); @@ -722,19 +724,20 @@ void SaveStateSelectorUI::InitializeListEntry(ListEntry* li, ExtendedSaveStateIn li->preview_texture.reset(); // Might not have a display yet, we're called at startup.. - if (g_host_display) + if (g_gpu_device) { if (ssi && !ssi->screenshot_data.empty()) { - li->preview_texture = - g_host_display->CreateTexture(ssi->screenshot_width, ssi->screenshot_height, 1, 1, 1, GPUTexture::Format::RGBA8, - ssi->screenshot_data.data(), sizeof(u32) * ssi->screenshot_width, false); + li->preview_texture = g_gpu_device->CreateTexture( + ssi->screenshot_width, ssi->screenshot_height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, + ssi->screenshot_data.data(), sizeof(u32) * ssi->screenshot_width, false); } else { - li->preview_texture = g_host_display->CreateTexture( - Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, - Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, false); + li->preview_texture = g_gpu_device->CreateTexture( + Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, + false); } if (!li->preview_texture) @@ -751,11 +754,12 @@ void SaveStateSelectorUI::InitializePlaceholderListEntry(ListEntry* li, std::str li->slot = slot; li->global = global; - if (g_host_display) + if (g_gpu_device) { - li->preview_texture = g_host_display->CreateTexture( - Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, - Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, false); + li->preview_texture = g_gpu_device->CreateTexture( + Resources::PLACEHOLDER_ICON_WIDTH, Resources::PLACEHOLDER_ICON_HEIGHT, 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, Resources::PLACEHOLDER_ICON_DATA, sizeof(u32) * Resources::PLACEHOLDER_ICON_WIDTH, + false); if (!li->preview_texture) Log_ErrorPrintf("Failed to upload save state image to GPU"); } diff --git a/src/core/playstation_mouse.cpp b/src/core/playstation_mouse.cpp index 423d94318..e0a998459 100644 --- a/src/core/playstation_mouse.cpp +++ b/src/core/playstation_mouse.cpp @@ -2,22 +2,26 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "playstation_mouse.h" -#include "common/assert.h" -#include "common/log.h" #include "gpu.h" #include "host.h" -#include "util/host_display.h" #include "system.h" + +#include "util/gpu_device.h" #include "util/state_wrapper.h" + +#include "common/assert.h" +#include "common/log.h" + #include + Log_SetChannel(PlayStationMouse); static constexpr std::array(PlayStationMouse::Button::Count)> s_button_indices = {{11, 10}}; PlayStationMouse::PlayStationMouse(u32 index) : Controller(index) { - m_last_host_position_x = g_host_display->GetMousePositionX(); - m_last_host_position_y = g_host_display->GetMousePositionY(); + m_last_host_position_x = g_gpu_device->GetMousePositionX(); + m_last_host_position_y = g_gpu_device->GetMousePositionY(); } PlayStationMouse::~PlayStationMouse() = default; @@ -157,8 +161,8 @@ bool PlayStationMouse::Transfer(const u8 data_in, u8* data_out) void PlayStationMouse::UpdatePosition() { // get screen coordinates - const s32 mouse_x = g_host_display->GetMousePositionX(); - const s32 mouse_y = g_host_display->GetMousePositionY(); + const s32 mouse_x = g_gpu_device->GetMousePositionX(); + const s32 mouse_y = g_gpu_device->GetMousePositionY(); const s32 delta_x = mouse_x - m_last_host_position_x; const s32 delta_y = mouse_y - m_last_host_position_y; m_last_host_position_x = mouse_x; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index f6aabbab2..aa3493198 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -3,21 +3,25 @@ #include "settings.h" #include "achievements.h" +#include "controller.h" +#include "host.h" +#include "host_settings.h" +#include "system.h" + +#include "util/gpu_device.h" + #include "common/assert.h" #include "common/file_system.h" #include "common/log.h" #include "common/make_array.h" #include "common/path.h" #include "common/string_util.h" -#include "controller.h" -#include "host.h" -#include "host_settings.h" -#include "system.h" -#include "util/host_display.h" + #include #include #include #include + Log_SetChannel(Settings); Settings g_settings; @@ -204,6 +208,7 @@ void Settings::Load(SettingsInterface& si) gpu_resolution_scale = static_cast(si.GetIntValue("GPU", "ResolutionScale", 1)); gpu_multisamples = static_cast(si.GetIntValue("GPU", "Multisamples", 1)); gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); + gpu_disable_shader_cache = si.GetBoolValue("GPU", "DisableShaderCache", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); @@ -440,6 +445,7 @@ void Settings::Save(SettingsInterface& si) const si.SetIntValue("GPU", "ResolutionScale", static_cast(gpu_resolution_scale)); si.SetIntValue("GPU", "Multisamples", static_cast(gpu_multisamples)); si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device); + si.SetBoolValue("GPU", "DisableShaderCache", gpu_disable_shader_cache); si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); si.SetBoolValue("GPU", "UseThread", gpu_use_thread); si.SetBoolValue("GPU", "ThreadedPresentation", gpu_threaded_presentation); @@ -877,6 +883,9 @@ static constexpr auto s_gpu_renderer_names = make_array( #ifdef _WIN32 "D3D11", "D3D12", #endif +#ifdef __APPLE__ + "Metal", +#endif #ifdef WITH_VULKAN "Vulkan", #endif @@ -888,6 +897,9 @@ static constexpr auto s_gpu_renderer_display_names = make_array( #ifdef _WIN32 TRANSLATE_NOOP("GPURenderer", "Hardware (D3D11)"), TRANSLATE_NOOP("GPURenderer", "Hardware (D3D12)"), #endif +#ifdef __APPLE__ + TRANSLATE_NOOP("GPURenderer", "Hardware (Metal)"), +#endif #ifdef WITH_VULKAN TRANSLATE_NOOP("GPURenderer", "Hardware (Vulkan)"), #endif @@ -930,6 +942,9 @@ RenderAPI Settings::GetRenderAPIForRenderer(GPURenderer renderer) case GPURenderer::HardwareD3D12: return RenderAPI::D3D12; #endif +#ifdef __APPLE__ + return RenderAPI::Metal; +#endif #ifdef WITH_VULKAN case GPURenderer::HardwareVulkan: return RenderAPI::Vulkan; @@ -940,7 +955,7 @@ RenderAPI Settings::GetRenderAPIForRenderer(GPURenderer renderer) #endif case GPURenderer::Software: default: - return HostDisplay::GetPreferredAPI(); + return GPUDevice::GetPreferredAPI(); } } @@ -1073,11 +1088,10 @@ float Settings::GetDisplayAspectRatioValue() const { case DisplayAspectRatio::MatchWindow: { - if (!g_host_display) + if (!g_gpu_device) return s_display_aspect_ratio_values[static_cast(DEFAULT_DISPLAY_ASPECT_RATIO)]; - return static_cast(g_host_display->GetWindowWidth()) / - static_cast(g_host_display->GetWindowHeight()); + return static_cast(g_gpu_device->GetWindowWidth()) / static_cast(g_gpu_device->GetWindowHeight()); } case DisplayAspectRatio::Custom: diff --git a/src/core/settings.h b/src/core/settings.h index 2d52a72b3..96317f6b1 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -100,6 +100,7 @@ struct Settings bool gpu_use_software_renderer_for_readbacks = false; bool gpu_threaded_presentation = true; bool gpu_use_debug_device = false; + bool gpu_disable_shader_cache = false; bool gpu_per_sample_shading = false; bool gpu_true_color = true; bool gpu_scaled_dithering = true; @@ -401,7 +402,9 @@ struct Settings static constexpr GPURenderer DEFAULT_GPU_RENDERER = GPURenderer::HardwareD3D12; #elif defined(_WIN32) static constexpr GPURenderer DEFAULT_GPU_RENDERER = GPURenderer::HardwareD3D11; -#elif defined(WITH_OPENGL) && (!defined(__APPLE__) || !defined(WITH_VULKAN)) +#elif defined(__APPLE__) + static constexpr GPURenderer DEFAULT_GPU_RENDERER = GPURenderer::HardwareMetal; +#elif defined(WITH_OPENGL) static constexpr GPURenderer DEFAULT_GPU_RENDERER = GPURenderer::HardwareOpenGL; #elif defined(WITH_VULKAN) static constexpr GPURenderer DEFAULT_GPU_RENDERER = GPURenderer::HardwareVulkan; diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 6fc020391..840e49e5d 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -2,6 +2,6 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "types.h" +#include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 7; \ No newline at end of file +static constexpr u32 SHADER_CACHE_VERSION = 9; \ No newline at end of file diff --git a/src/core/system.cpp b/src/core/system.cpp index f0e3c4330..086719f1c 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -8,13 +8,6 @@ #include "bus.h" #include "cdrom.h" #include "cheats.h" -#include "common/error.h" -#include "common/file_system.h" -#include "common/log.h" -#include "common/make_array.h" -#include "common/path.h" -#include "common/string_util.h" -#include "common/threading.h" #include "controller.h" #include "cpu_code_cache.h" #include "cpu_core.h" @@ -40,13 +33,24 @@ #include "spu.h" #include "texture_replacements.h" #include "timers.h" + #include "util/audio_stream.h" #include "util/cd_image.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/ini_settings_interface.h" #include "util/iso_reader.h" #include "util/state_wrapper.h" + +#include "common/error.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/make_array.h" +#include "common/path.h" +#include "common/string_util.h" +#include "common/threading.h" + #include "xxhash.h" + #include #include #include @@ -55,6 +59,7 @@ #include #include #include + Log_SetChannel(System); #ifdef _WIN32 @@ -70,7 +75,9 @@ SystemBootParameters::SystemBootParameters(const SystemBootParameters&) = defaul SystemBootParameters::SystemBootParameters(SystemBootParameters&& other) = default; -SystemBootParameters::SystemBootParameters(std::string filename_) : filename(std::move(filename_)) {} +SystemBootParameters::SystemBootParameters(std::string filename_) : filename(std::move(filename_)) +{ +} SystemBootParameters::~SystemBootParameters() = default; @@ -135,6 +142,7 @@ static std::string s_input_profile_name; static System::State s_state = System::State::Shutdown; static std::atomic_bool s_startup_cancelled{false}; +static bool s_keep_gpu_device_on_shutdown = false; static ConsoleRegion s_region = ConsoleRegion::NTSC_U; TickCount System::g_ticks_per_second = System::MASTER_CLOCK; @@ -799,12 +807,10 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_display, bool if (!state_valid) Log_ErrorPrintf("Failed to save old GPU state when switching renderers"); - g_gpu->ResetGraphicsAPIState(); - // create new renderer g_gpu.reset(); if (force_recreate_display) - Host::ReleaseHostDisplay(); + Host::ReleaseGPUDevice(); if (!CreateGPU(renderer)) { @@ -822,7 +828,6 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_display, bool g_gpu->RestoreGraphicsAPIState(); g_gpu->DoState(sw, nullptr, update_display); TimingEvents::DoState(sw); - g_gpu->ResetGraphicsAPIState(); } // fix up vsync etc @@ -1062,6 +1067,7 @@ bool System::LoadState(const char* filename) ResetPerformanceCounters(); ResetThrottler(); Host::RenderDisplay(false); + g_gpu->RestoreGraphicsAPIState(); Log_VerbosePrintf("Loading state took %.2f msec", load_timer.GetTimeMilliseconds()); return true; } @@ -1135,6 +1141,7 @@ bool System::BootSystem(SystemBootParameters parameters) Assert(s_state == State::Shutdown); s_state = State::Starting; s_startup_cancelled.store(false); + s_keep_gpu_device_on_shutdown = static_cast(g_gpu_device); s_region = g_settings.region; Host::OnSystemStarting(); @@ -1437,7 +1444,11 @@ bool System::Initialize(bool force_software_renderer) if (IsStartupCancelled()) { g_gpu.reset(); - Host::ReleaseHostDisplay(); + if (!s_keep_gpu_device_on_shutdown) + { + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); + } if (g_settings.gpu_pgxp_enable) PGXP::Shutdown(); CPU::Shutdown(); @@ -1519,8 +1530,6 @@ void System::DestroySystem() Timers::Shutdown(); Pad::Shutdown(); CDROM::Shutdown(); - if (g_gpu) - g_gpu->ResetGraphicsAPIState(); g_gpu.reset(); InterruptController::Shutdown(); DMA::Shutdown(); @@ -1532,11 +1541,15 @@ void System::DestroySystem() ClearRunningGame(); // Restore present-all-frames behavior. - if (g_host_display) + if (s_keep_gpu_device_on_shutdown && g_gpu_device) { - g_host_display->SetDisplayMaxFPS(0.0f); + g_gpu_device->SetDisplayMaxFPS(0.0f); UpdateSoftwareCursor(); - Host::ReleaseHostDisplay(); + } + else + { + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); } s_bios_hash = {}; @@ -1601,8 +1614,6 @@ void System::Execute() else CPU::Execute(); - g_gpu->ResetGraphicsAPIState(); - s_system_executing = false; continue; } @@ -1624,6 +1635,9 @@ void System::FrameDone() { s_frame_number++; + // Vertex buffer is shared, need to flush what we have. + g_gpu->FlushRender(); + // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. SPU::GeneratePendingSamples(); @@ -1680,14 +1694,11 @@ void System::FrameDone() { s_last_frame_skipped = false; - // TODO: Purge reset/restore - g_gpu->ResetGraphicsAPIState(); - - const bool skip_present = g_host_display->ShouldSkipDisplayingFrame(); + const bool skip_present = g_gpu_device->ShouldSkipDisplayingFrame(); Host::RenderDisplay(skip_present); - if (!skip_present && g_host_display->IsGPUTimingEnabled()) + if (!skip_present && g_gpu_device->IsGPUTimingEnabled()) { - s_accumulated_gpu_time += g_host_display->GetAndResetAccumulatedGPUTime(); + s_accumulated_gpu_time += g_gpu_device->GetAndResetAccumulatedGPUTime(); s_presents_since_last_update++; } @@ -1784,10 +1795,9 @@ void System::SingleStepCPU() CPU::SingleStep(); + g_gpu->FlushRender(); SPU::GeneratePendingSamples(); - g_gpu->ResetGraphicsAPIState(); - s_system_executing = false; } @@ -1834,35 +1844,29 @@ void System::RecreateSystem() bool System::CreateGPU(GPURenderer renderer) { - switch (renderer) + const RenderAPI api = Settings::GetRenderAPIForRenderer(renderer); + + if (!g_gpu_device || (renderer != GPURenderer::Software && g_gpu_device->GetRenderAPI() != api)) { -#ifdef WITH_OPENGL - case GPURenderer::HardwareOpenGL: - g_gpu = GPU::CreateHardwareOpenGLRenderer(); - break; -#endif + if (g_gpu_device) + { + Log_WarningPrintf("Recreating GPU device, expecting %s got %s", GPUDevice::RenderAPIToString(api), + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI())); + } -#ifdef WITH_VULKAN - case GPURenderer::HardwareVulkan: - g_gpu = GPU::CreateHardwareVulkanRenderer(); - break; -#endif - -#ifdef _WIN32 - case GPURenderer::HardwareD3D11: - g_gpu = GPU::CreateHardwareD3D11Renderer(); - break; - case GPURenderer::HardwareD3D12: - g_gpu = GPU::CreateHardwareD3D12Renderer(); - break; -#endif - - case GPURenderer::Software: - default: - g_gpu = GPU::CreateSoftwareRenderer(); - break; + Host::ReleaseGPUDevice(); + if (!Host::CreateGPUDevice(api)) + { + Host::ReleaseRenderWindow(); + return false; + } } + if (renderer == GPURenderer::Software) + g_gpu = GPU::CreateSoftwareRenderer(); + else + g_gpu = GPU::CreateHardwareRenderer(); + if (!g_gpu) { Log_ErrorPrintf("Failed to initialize %s renderer, falling back to software renderer", @@ -1875,6 +1879,11 @@ bool System::CreateGPU(GPURenderer renderer) if (!g_gpu) { Log_ErrorPrintf("Failed to create fallback software renderer."); + if (!s_keep_gpu_device_on_shutdown) + { + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); + } return false; } } @@ -1934,9 +1943,7 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return false; g_gpu->RestoreGraphicsAPIState(); - const bool gpu_result = sw.DoMarker("GPU") && g_gpu->DoState(sw, host_texture, update_display); - g_gpu->ResetGraphicsAPIState(); - if (!gpu_result) + if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, host_texture, update_display)) return false; if (!sw.DoMarker("CDROM") || !CDROM::DoState(sw)) @@ -2071,8 +2078,6 @@ void System::InternalReset() #ifdef WITH_CHEEVOS Achievements::ResetRuntime(); #endif - - g_gpu->ResetGraphicsAPIState(); } std::string System::GetMediaPathFromSaveState(const char* path) @@ -2283,7 +2288,7 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * if (screenshot_size > 0) { // assume this size is the width - const float display_aspect_ratio = g_host_display->GetDisplayAspectRatio(); + const float display_aspect_ratio = g_gpu_device->GetDisplayAspectRatio(); const u32 screenshot_width = screenshot_size; const u32 screenshot_height = std::max(1u, static_cast(static_cast(screenshot_width) / @@ -2293,7 +2298,7 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * std::vector screenshot_buffer; u32 screenshot_stride; GPUTexture::Format screenshot_format; - if (g_host_display->RenderScreenshot(screenshot_width, screenshot_height, + if (g_gpu_device->RenderScreenshot(screenshot_width, screenshot_height, Common::Rectangle::FromExtents(0, 0, screenshot_width, screenshot_height), &screenshot_buffer, &screenshot_stride, &screenshot_format) && GPUTexture::ConvertTextureDataToRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride, @@ -2306,7 +2311,7 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * } else { - if (g_host_display->UsesLowerLeftOrigin()) + if (g_gpu_device->UsesLowerLeftOrigin()) { GPUTexture::FlipTextureDataRGBA8(screenshot_width, screenshot_height, screenshot_buffer, screenshot_stride); } @@ -2350,8 +2355,6 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * header.data_compressed_size = static_cast(state->GetPosition() - header.offset_to_data); } - g_gpu->ResetGraphicsAPIState(); - if (!result) return false; } @@ -2427,7 +2430,7 @@ void System::UpdatePerformanceCounters() s_fps_timer.ResetTo(now_ticks); - if (g_host_display->IsGPUTimingEnabled()) + if (g_gpu_device->IsGPUTimingEnabled()) { s_average_gpu_time = s_accumulated_gpu_time / static_cast(std::max(s_presents_since_last_update, 1u)); s_gpu_usage = s_accumulated_gpu_time / (time * 10.0f); @@ -2474,7 +2477,7 @@ void System::UpdateSpeedLimiterState() s_target_speed == 1.0f && IsValid()) { float host_refresh_rate; - if (g_host_display->GetHostRefreshRate(&host_refresh_rate)) + if (g_gpu_device->GetHostRefreshRate(&host_refresh_rate)) { const float ratio = host_refresh_rate / System::GetThrottleFrequency(); s_syncing_to_host = (ratio >= 0.95f && ratio <= 1.05f); @@ -2530,8 +2533,8 @@ void System::UpdateDisplaySync() Log_VerbosePrintf("Max display fps: %f (%s)", max_display_fps, s_display_all_frames ? "displaying all frames" : "skipping displaying frames when needed"); - g_host_display->SetDisplayMaxFPS(max_display_fps); - g_host_display->SetVSync(video_sync_enabled); + g_gpu_device->SetDisplayMaxFPS(max_display_fps); + g_gpu_device->SetVSync(video_sync_enabled); } bool System::ShouldUseVSync() @@ -3038,10 +3041,7 @@ bool System::DumpVRAM(const char* filename) return false; g_gpu->RestoreGraphicsAPIState(); - const bool result = g_gpu->DumpVRAMToFile(filename); - g_gpu->ResetGraphicsAPIState(); - - return result; + return g_gpu->DumpVRAMToFile(filename); } bool System::DumpSPURAM(const char* filename) @@ -3492,12 +3492,12 @@ void System::CheckForSettingsChanges(const Settings& old_settings) { if (g_settings.display_post_processing && !g_settings.display_post_process_chain.empty()) { - if (!g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain)) + if (!g_gpu_device->SetPostProcessingChain(g_settings.display_post_process_chain)) Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Failed to load post processing shader chain."), 20.0f); } else { - g_host_display->SetPostProcessingChain({}); + g_gpu_device->SetPostProcessingChain({}); } } } @@ -3708,8 +3708,6 @@ void System::DoRewind() s_next_frame_time += s_frame_period; - // TODO: Purge reset/restore - g_gpu->ResetGraphicsAPIState(); Host::RenderDisplay(false); g_gpu->RestoreGraphicsAPIState(); @@ -3989,7 +3987,7 @@ bool System::SaveScreenshot(const char* filename /* = nullptr */, bool full_reso return false; } - const bool screenshot_saved = g_host_display->WriteScreenshotToFile( + const bool screenshot_saved = g_gpu_device->WriteScreenshotToFile( filename, g_settings.display_internal_resolution_screenshots, compress_on_thread); if (!screenshot_saved) @@ -4340,13 +4338,13 @@ void System::TogglePostProcessing() { Host::AddKeyedOSDMessage("PostProcessing", TRANSLATE_STR("OSDMessage", "Post-processing is now enabled."), 10.0f); - if (!g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain)) + if (!g_gpu_device->SetPostProcessingChain(g_settings.display_post_process_chain)) Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Failed to load post processing shader chain."), 20.0f); } else { Host::AddKeyedOSDMessage("PostProcessing", TRANSLATE_STR("OSDMessage", "Post-processing is now disabled."), 10.0f); - g_host_display->SetPostProcessingChain({}); + g_gpu_device->SetPostProcessingChain({}); } } @@ -4355,7 +4353,7 @@ void System::ReloadPostProcessingShaders() if (!IsValid() || !g_settings.display_post_processing) return; - if (!g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain)) + if (!g_gpu_device->SetPostProcessingChain(g_settings.display_post_process_chain)) Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Failed to load post-processing shader chain."), 20.0f); else Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Post-processing shaders reloaded."), 10.0f); @@ -4421,7 +4419,7 @@ void System::UpdateSoftwareCursor() if (!IsValid()) { Host::SetMouseMode(false, false); - g_host_display->ClearSoftwareCursor(); + g_gpu_device->ClearSoftwareCursor(); return; } @@ -4444,12 +4442,12 @@ void System::UpdateSoftwareCursor() if (image && image->IsValid()) { - g_host_display->SetSoftwareCursor(image->GetPixels(), image->GetWidth(), image->GetHeight(), image->GetPitch(), + g_gpu_device->SetSoftwareCursor(image->GetPixels(), image->GetWidth(), image->GetHeight(), image->GetPitch(), image_scale); } else { - g_host_display->ClearSoftwareCursor(); + g_gpu_device->ClearSoftwareCursor(); } } @@ -4462,13 +4460,13 @@ void System::RequestDisplaySize(float scale /*= 0.0f*/) scale = g_gpu->IsHardwareRenderer() ? static_cast(g_settings.gpu_resolution_scale) : 1.0f; const float y_scale = - (static_cast(g_host_display->GetDisplayWidth()) / static_cast(g_host_display->GetDisplayHeight())) / - g_host_display->GetDisplayAspectRatio(); + (static_cast(g_gpu_device->GetDisplayWidth()) / static_cast(g_gpu_device->GetDisplayHeight())) / + g_gpu_device->GetDisplayAspectRatio(); const u32 requested_width = - std::max(static_cast(std::ceil(static_cast(g_host_display->GetDisplayWidth()) * scale)), 1); + std::max(static_cast(std::ceil(static_cast(g_gpu_device->GetDisplayWidth()) * scale)), 1); const u32 requested_height = std::max( - static_cast(std::ceil(static_cast(g_host_display->GetDisplayHeight()) * y_scale * scale)), 1); + static_cast(std::ceil(static_cast(g_gpu_device->GetDisplayHeight()) * y_scale * scale)), 1); Host::RequestResizeHostDisplay(static_cast(requested_width), static_cast(requested_height)); } diff --git a/src/core/system.h b/src/core/system.h index e51d94cc6..fb71020f9 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -489,16 +489,18 @@ void PumpMessagesOnCPUThread(); /// Requests a specific display window size. void RequestResizeHostDisplay(s32 width, s32 height); -/// Requests shut down and exit of the hosting application. This may not actually exit, -/// if the user cancels the shutdown confirmation. -void RequestExit(bool allow_confirm); - /// Requests shut down of the current virtual machine. void RequestSystemShutdown(bool allow_confirm, bool save_state); -/// Returns true if the hosting application is currently fullscreen. -bool IsFullscreen(); +/// Attempts to create the rendering device backend. +bool CreateGPUDevice(RenderAPI api); -/// Alters fullscreen state of hosting application. -void SetFullscreen(bool enabled); +/// Handles fullscreen transitions and such. +void UpdateDisplayWindow(); + +/// Called when the window is resized. +void ResizeDisplayWindow(s32 width, s32 height, float scale); + +/// Destroys any active rendering device. +void ReleaseGPUDevice(); } // namespace Host diff --git a/src/core/types.h b/src/core/types.h index 49ede88a5..3092d25f4 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -62,6 +62,9 @@ enum class GPURenderer : u8 HardwareD3D11, HardwareD3D12, #endif +#ifdef __APPLE__ + HardwareMetal, +#endif #ifdef WITH_VULKAN HardwareVulkan, #endif diff --git a/src/duckstation-nogui/CMakeLists.txt b/src/duckstation-nogui/CMakeLists.txt index 60b97ec87..08fda8927 100644 --- a/src/duckstation-nogui/CMakeLists.txt +++ b/src/duckstation-nogui/CMakeLists.txt @@ -67,14 +67,12 @@ if(USE_WAYLAND) ) endif() -if(USE_DRMKMS AND USE_EVDEV) - message(STATUS "Building VTY/DRM/KMS/EVDev NoGUI Platform.") - target_compile_definitions(duckstation-nogui PRIVATE "NOGUI_PLATFORM_VTY=1" "WITH_DRMKMS=1") - target_sources(duckstation-nogui PRIVATE - vty_key_names.h - vty_nogui_platform.cpp - vty_nogui_platform.h +if(APPLE) + message(STATUS "Building Cocoa NoGUI Platform.") + target_sources(duckstation-nogui PRIVATE + cocoa_key_names.h + cocoa_nogui_platform.mm + cocoa_nogui_platform.h ) - target_include_directories(duckstation-nogui PRIVATE ${LIBEVDEV_INCLUDE_DIRS}) - target_link_libraries(duckstation-nogui PRIVATE ${LIBEVDEV_LIBRARIES}) endif() + diff --git a/src/duckstation-nogui/cocoa_key_names.h b/src/duckstation-nogui/cocoa_key_names.h new file mode 100644 index 000000000..5e5e44828 --- /dev/null +++ b/src/duckstation-nogui/cocoa_key_names.h @@ -0,0 +1,133 @@ +#pragma once +#include "common/types.h" + +#include +#include +#include +#include +#include + +#include + +namespace CocoaKeyNames { +static const std::map s_cocoa_key_names = { + {kVK_Return, "Return"}, + {kVK_Escape, "Escape"}, + {kVK_Delete, "Backspace"}, + {kVK_Tab, "Tab"}, + {kVK_Space, "Space"}, + {kVK_ANSI_Quote, "Quote"}, + {kVK_ANSI_Comma, "Comma"}, + {kVK_ANSI_Minus, "Minus"}, + {kVK_ANSI_Period, "Period"}, + {kVK_ANSI_Slash, "Slash"}, + {kVK_ANSI_0, "0"}, + {kVK_ANSI_1, "1"}, + {kVK_ANSI_2, "2"}, + {kVK_ANSI_3, "3"}, + {kVK_ANSI_4, "4"}, + {kVK_ANSI_5, "5"}, + {kVK_ANSI_6, "6"}, + {kVK_ANSI_7, "7"}, + {kVK_ANSI_8, "8"}, + {kVK_ANSI_9, "9"}, + {kVK_ANSI_Semicolon, "Semcolon"}, + {kVK_ANSI_Equal, "Equal"}, + {kVK_ANSI_LeftBracket, "BracketLeft"}, + {kVK_ANSI_Backslash, "Backslash"}, + {kVK_ANSI_RightBracket, "BracketRight"}, + {kVK_ANSI_Grave, "Grave"}, + {kVK_ANSI_A, "A"}, + {kVK_ANSI_B, "B"}, + {kVK_ANSI_C, "C"}, + {kVK_ANSI_D, "D"}, + {kVK_ANSI_E, "E"}, + {kVK_ANSI_F, "F"}, + {kVK_ANSI_G, "G"}, + {kVK_ANSI_H, "H"}, + {kVK_ANSI_I, "I"}, + {kVK_ANSI_J, "J"}, + {kVK_ANSI_K, "K"}, + {kVK_ANSI_L, "L"}, + {kVK_ANSI_M, "M"}, + {kVK_ANSI_N, "N"}, + {kVK_ANSI_O, "O"}, + {kVK_ANSI_P, "P"}, + {kVK_ANSI_Q, "Q"}, + {kVK_ANSI_R, "R"}, + {kVK_ANSI_S, "S"}, + {kVK_ANSI_T, "T"}, + {kVK_ANSI_U, "U"}, + {kVK_ANSI_V, "V"}, + {kVK_ANSI_W, "W"}, + {kVK_ANSI_X, "X"}, + {kVK_ANSI_Y, "Y"}, + {kVK_ANSI_Z, "Z"}, + {kVK_CapsLock, "CapsLock"}, + {kVK_F1, "F1"}, + {kVK_F2, "F2"}, + {kVK_F3, "F3"}, + {kVK_F4, "F4"}, + {kVK_F5, "F5"}, + {kVK_F6, "F6"}, + {kVK_F7, "F7"}, + {kVK_F8, "F8"}, + {kVK_F9, "F9"}, + {kVK_F10, "F10"}, + {kVK_F11, "F11"}, + {kVK_F12, "F12"}, + {kVK_Home, "Home"}, + {kVK_PageUp, "PageUp"}, + {kVK_End, "End"}, + {kVK_PageDown, "PageDown"}, + {kVK_RightArrow, "Right"}, + {kVK_LeftArrow, "Left"}, + {kVK_DownArrow, "Down"}, + {kVK_UpArrow, "Up"}, + {kVK_ANSI_KeypadDivide, "KeypadDivide"}, + {kVK_ANSI_KeypadMultiply, "KeypadMultiply"}, + {kVK_ANSI_KeypadMinus, "KeypadMinus"}, + {kVK_ANSI_KeypadPlus, "KeypadPlus"}, + {kVK_ANSI_KeypadEnter, "KeypadReturn"}, + {kVK_ANSI_Keypad1, "Keypad1"}, + {kVK_ANSI_Keypad2, "Keypad2"}, + {kVK_ANSI_Keypad3, "Keypad3"}, + {kVK_ANSI_Keypad4, "Keypad4"}, + {kVK_ANSI_Keypad5, "Keypad5"}, + {kVK_ANSI_Keypad6, "Keypad6"}, + {kVK_ANSI_Keypad7, "Keypad7"}, + {kVK_ANSI_Keypad8, "Keypad8"}, + {kVK_ANSI_Keypad9, "Keypad9"}, + {kVK_ANSI_Keypad0, "Keypad0"}, + {kVK_ANSI_KeypadDecimal, "KeypadPeriod"}, + {kVK_F13, "F13"}, + {kVK_F14, "F14"}, + {kVK_F15, "F15"}, + {kVK_F16, "F16"}, + {kVK_F17, "F17"}, + {kVK_F18, "F18"}, + {kVK_F19, "F19"}, + {kVK_F20, "F20"}, + {kVK_Help, "Help"}, + {kVK_Option, "Alt"}, + {kVK_Command, "Super"}, + {kVK_Function, "Control"}, +}; + +static const char* GetKeyName(unsigned short key) +{ + const auto it = s_cocoa_key_names.find(key); + return it == s_cocoa_key_names.end() ? nullptr : it->second; +} + +static std::optional GetKeyCodeForName(const std::string_view& key_name) +{ + for (const auto& it : s_cocoa_key_names) + { + if (key_name == it.second) + return it.first; + } + + return std::nullopt; +} +} // namespace CocoaKeyNames diff --git a/src/duckstation-nogui/vty_nogui_platform.h b/src/duckstation-nogui/cocoa_nogui_platform.h similarity index 54% rename from src/duckstation-nogui/vty_nogui_platform.h rename to src/duckstation-nogui/cocoa_nogui_platform.h index bcb3104e4..5b74978d4 100644 --- a/src/duckstation-nogui/vty_nogui_platform.h +++ b/src/duckstation-nogui/cocoa_nogui_platform.h @@ -1,20 +1,43 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "nogui_platform.h" -#include -#include -#include -#include -#include -#include -class VTYNoGUIPlatform : public NoGUIPlatform +#include +#include + +#ifndef __OBJC__ +#error This file needs to be compiled with Objective C++. +#endif + +#if __has_feature(objc_arc) +#error ARC should not be enabled. +#endif + +#include "nogui_platform.h" + +#include + +@interface CocoaNoGUIView : NSView +- (BOOL)acceptsFirstResponder; +- (BOOL)canBecomeKeyView; +- (void)mouseDown:(NSEvent *)event; +- (void)rightMouseDown:(NSEvent *)event; +- (void)otherMouseDown:(NSEvent *)event; +- (void)mouseUp:(NSEvent *)event; +- (void)rightMouseUp:(NSEvent *)event; +- (void)otherMouseUp:(NSEvent *)event; +- (void)mouseMoved:(NSEvent *)event; +- (void)keyDown:(NSEvent *)event; +- (void)keyUp:(NSEvent *)event; +- (void)windowDidEndLiveResize:(NSNotification *)notif; +@end + +class CocoaNoGUIPlatform : public NoGUIPlatform { public: - VTYNoGUIPlatform(); - ~VTYNoGUIPlatform(); + CocoaNoGUIPlatform(); + ~CocoaNoGUIPlatform(); bool Initialize(); @@ -24,6 +47,7 @@ public: void SetDefaultConfig(SettingsInterface& si) override; bool CreatePlatformWindow(std::string title) override; + bool HasPlatformWindow() const override; void DestroyPlatformWindow() override; std::optional GetPlatformWindowInfo() override; void SetPlatformWindowTitle(std::string title) override; @@ -43,21 +67,8 @@ public: bool CopyTextToClipboard(const std::string_view& text) override; private: - void OpenEVDevFDs(); - void CloseEVDevFDs(); - void PollEvDevKeyboards(); - void SetImGuiKeyMap(); + NSWindow* m_window = nil; + float m_window_scale = 1.0f; - struct EvDevKeyboard - { - struct libevdev* obj; - int fd; - }; - - std::vector m_evdev_keyboards; - - std::deque> m_callback_queue; - std::mutex m_callback_queue_mutex; - - std::atomic_bool m_message_loop_running{false}; + std::atomic_bool m_fullscreen{false}; }; diff --git a/src/duckstation-nogui/cocoa_nogui_platform.mm b/src/duckstation-nogui/cocoa_nogui_platform.mm new file mode 100644 index 000000000..fb3b0b8a1 --- /dev/null +++ b/src/duckstation-nogui/cocoa_nogui_platform.mm @@ -0,0 +1,305 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "cocoa_nogui_platform.h" +#include "cocoa_key_names.h" +#include "nogui_host.h" + +#include "core/host.h" +#include "core/host_settings.h" + +#include "util/cocoa_tools.h" +#include "util/imgui_manager.h" + +#include "common/log.h" +#include "common/scoped_guard.h" +#include "common/string_util.h" +#include "common/threading.h" + +Log_SetChannel(CocoaNoGUIPlatform); + +constexpr NSWindowStyleMask WINDOWED_STYLE = NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | NSWindowStyleMaskMiniaturizable | NSWindowStyleMaskResizable; + +@implementation CocoaNoGUIView + +- (BOOL)acceptsFirstResponder { + return YES; +} +- (BOOL)canBecomeKeyView { + return YES; +} +- (void)viewDidEndLiveResize:(NSEvent *)event { + [super viewDidEndLiveResize:event]; +} +- (void)mouseDown:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(0, true); +} +- (void)rightMouseDown:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(1, true); +} +- (void)otherMouseDown:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(static_cast(event.buttonNumber), true); +} + +- (void)mouseUp:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(0, false); +} +- (void)rightMouseUp:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(1, false); +} +- (void)otherMouseUp:(NSEvent *)event { + NoGUIHost::ProcessPlatformMouseButtonEvent(static_cast(event.buttonNumber), false); +} + +- (void)mouseMoved:(NSEvent *)event { + // Flip for lower-left origin. + const NSView* contentView = self; + const NSPoint pt = [contentView convertPointToBacking:[event locationInWindow]]; + const NSSize size = [contentView convertSizeToBacking:contentView.frame.size]; + const float local_x = pt.x; + const float local_y = size.height - pt.y; + NoGUIHost::ProcessPlatformMouseMoveEvent(local_x, local_y); +} + +- (void)keyDown:(NSEvent *)event { + [super keyDown:event]; + if (ImGuiManager::WantsTextInput() && event.characters && event.characters.length > 0) + { + ImGuiManager::AddTextInput([event.characters UTF8String]); + } + + if (!event.isARepeat) + NoGUIHost::ProcessPlatformKeyEvent(static_cast(event.keyCode), true); +} + +- (void)keyUp:(NSEvent *)event { + [super keyUp:event]; + NoGUIHost::ProcessPlatformKeyEvent(static_cast(event.keyCode), false); +} + +- (void)windowDidEndLiveResize:(NSNotification *)notif +{ + const NSSize size = [self convertSizeToBacking:self.frame.size]; + NoGUIHost::ProcessPlatformWindowResize(static_cast(size.width), static_cast(size.height), 1.0f); +} +@end + +CocoaNoGUIPlatform::CocoaNoGUIPlatform() = default; + +CocoaNoGUIPlatform::~CocoaNoGUIPlatform() +{ + if (m_window) + { + [m_window release]; + m_window = nil; + } +} + +bool CocoaNoGUIPlatform::Initialize() +{ + [NSApplication sharedApplication]; + + // Needed for keyboard in put. + const ProcessSerialNumber psn = {0, kCurrentProcess}; + TransformProcessType(&psn, kProcessTransformToForegroundApplication); + return true; +} + +void CocoaNoGUIPlatform::ReportError(const std::string_view& title, const std::string_view& message) +{ + if (![NSThread isMainThread]) + { + dispatch_sync(dispatch_get_main_queue(), [this, &title, &message]() { ReportError(title, message); }); + return; + } + + @autoreleasepool { + NSAlert *alert = [[[NSAlert alloc] init] autorelease]; + [alert setMessageText: CocoaTools::StringViewToNSString(title)]; + [alert setInformativeText: CocoaTools::StringViewToNSString(message)]; + [alert runModal]; + } +} + +bool CocoaNoGUIPlatform::ConfirmMessage(const std::string_view& title, const std::string_view& message) +{ + if (![NSThread isMainThread]) + { + bool result = false; + dispatch_sync(dispatch_get_main_queue(), [this, &title, &message, &result]() { result = ConfirmMessage(title, message); }); + return result; + } + + @autoreleasepool { + NSAlert *alert = [[[NSAlert alloc] init] autorelease]; + [alert setMessageText: CocoaTools::StringViewToNSString(title)]; + [alert setInformativeText: CocoaTools::StringViewToNSString(message)]; + [alert addButtonWithTitle:@"Yes"]; + [alert addButtonWithTitle:@"No"]; + return ([alert runModal] == 0); + } +} + +void CocoaNoGUIPlatform::SetDefaultConfig(SettingsInterface& si) +{ + // noop +} + +bool CocoaNoGUIPlatform::CreatePlatformWindow(std::string title) +{ + @autoreleasepool { + s32 window_x, window_y, window_width, window_height; + const bool has_window_geom = NoGUIHost::GetSavedPlatformWindowGeometry(&window_x, &window_y, &window_width, &window_height); + if (!has_window_geom) + { + window_width = DEFAULT_WINDOW_WIDTH; + window_height = DEFAULT_WINDOW_HEIGHT; + } + + m_window = [[NSWindow alloc] initWithContentRect:NSMakeRect(0.0f, 0.0f, static_cast(window_width), static_cast(window_height)) + styleMask:WINDOWED_STYLE + backing:NSBackingStoreBuffered defer:YES]; + + CocoaNoGUIView* view = [[[CocoaNoGUIView alloc] init] autorelease]; + [m_window setDelegate:view]; + [m_window setContentView:view]; + + if (!has_window_geom) + [m_window center]; + else + [m_window setFrameOrigin:NSMakePoint(static_cast(window_x), static_cast(window_y))]; + + [m_window setTitle: [NSString stringWithUTF8String:title.c_str()]]; + [m_window setAcceptsMouseMovedEvents:YES]; + [m_window setReleasedWhenClosed:NO]; + [m_window setIsVisible:TRUE]; + [m_window makeKeyAndOrderFront:nil]; + } + + if (m_fullscreen.load(std::memory_order_acquire)) + SetFullscreen(true); + + return true; +} + +bool CocoaNoGUIPlatform::HasPlatformWindow() const +{ + return (m_window != NULL); +} + +void CocoaNoGUIPlatform::DestroyPlatformWindow() +{ + if (m_window == nil) + return; + + const CGPoint frame_origin = m_window.frame.origin; + const CGSize content_size = m_window.contentView.frame.size; + + if (!m_fullscreen.load(std::memory_order_acquire)) + { + NoGUIHost::SavePlatformWindowGeometry(static_cast(frame_origin.x), static_cast(frame_origin.y), + static_cast(content_size.width), static_cast(content_size.height)); + } + + [m_window close]; + [m_window release]; + m_window = nil; +} + +std::optional CocoaNoGUIPlatform::GetPlatformWindowInfo() +{ + if (m_window == nil) + return std::nullopt; + + NSView* contentView = [m_window contentView]; + const NSSize size = [contentView convertSizeToBacking:contentView.frame.size]; + + WindowInfo wi; + wi.surface_width = static_cast(size.width); + wi.surface_height = static_cast(size.height); + wi.surface_scale = m_window_scale; + wi.type = WindowInfo::Type::MacOS; + wi.window_handle = static_cast(m_window.contentView); + return wi; +} + +void CocoaNoGUIPlatform::SetPlatformWindowTitle(std::string title) +{ + dispatch_async(dispatch_get_main_queue(), [this, title = std::move(title)]() { + if (!m_window) + return; + + @autoreleasepool { + [m_window setTitle: [NSString stringWithUTF8String:title.c_str()]]; + } + }); +} + +std::optional CocoaNoGUIPlatform::ConvertHostKeyboardStringToCode(const std::string_view& str) +{ + std::optional converted(CocoaKeyNames::GetKeyCodeForName(str)); + return converted.has_value() ? std::optional(static_cast(converted.value())) : std::nullopt; +} + +std::optional CocoaNoGUIPlatform::ConvertHostKeyboardCodeToString(u32 code) +{ + const char* converted = CocoaKeyNames::GetKeyName(static_cast(code)); + return converted ? std::optional(converted) : std::nullopt; +} + +void CocoaNoGUIPlatform::RunMessageLoop() +{ + [NSApp run]; +} + +void CocoaNoGUIPlatform::ExecuteInMessageLoop(std::function func) +{ + dispatch_async(dispatch_get_main_queue(), [func = std::move(func)]() { + func(); + }); +} + +void CocoaNoGUIPlatform::QuitMessageLoop() +{ + [NSApp stop:nil]; +} + +void CocoaNoGUIPlatform::SetFullscreen(bool enabled) +{ + Log_ErrorPrint("SetFullscreen() not implemented."); +} + +bool CocoaNoGUIPlatform::RequestRenderWindowSize(s32 new_window_width, s32 new_window_height) +{ + dispatch_async(dispatch_get_main_queue(), [this, new_window_width, new_window_height]() { + if (!m_window) + return; + + @autoreleasepool { + [m_window setContentSize:NSMakeSize(static_cast(new_window_width), static_cast(new_window_height))]; + } + }); + + return true; +} + +bool CocoaNoGUIPlatform::OpenURL(const std::string_view& url) +{ + Log_ErrorPrint("OpenURL() not implemented."); + return false; +} + +bool CocoaNoGUIPlatform::CopyTextToClipboard(const std::string_view& text) +{ + Log_ErrorPrint("CopyTextToClipboard() not implemented."); + return false; +} + +std::unique_ptr NoGUIPlatform::CreateCocoaPlatform() +{ + std::unique_ptr ret(new CocoaNoGUIPlatform()); + if (!ret->Initialize()) + return {}; + + return ret; +} diff --git a/src/duckstation-nogui/duckstation-nogui.vcxproj b/src/duckstation-nogui/duckstation-nogui.vcxproj index c55bd2d1f..71085741d 100644 --- a/src/duckstation-nogui/duckstation-nogui.vcxproj +++ b/src/duckstation-nogui/duckstation-nogui.vcxproj @@ -3,9 +3,6 @@ - - true - true @@ -18,12 +15,6 @@ - - true - - - true - true diff --git a/src/duckstation-nogui/duckstation-nogui.vcxproj.filters b/src/duckstation-nogui/duckstation-nogui.vcxproj.filters index 1952e3cf8..db825fb69 100644 --- a/src/duckstation-nogui/duckstation-nogui.vcxproj.filters +++ b/src/duckstation-nogui/duckstation-nogui.vcxproj.filters @@ -3,7 +3,6 @@ - @@ -11,8 +10,6 @@ - - diff --git a/src/duckstation-nogui/nogui_host.cpp b/src/duckstation-nogui/nogui_host.cpp index e3e98ad11..4b3272e83 100644 --- a/src/duckstation-nogui/nogui_host.cpp +++ b/src/duckstation-nogui/nogui_host.cpp @@ -17,7 +17,7 @@ #include "core/settings.h" #include "core/system.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/imgui_manager.h" #include "util/ini_settings_interface.h" #include "util/input_manager.h" @@ -63,6 +63,9 @@ std::unique_ptr g_nogui_window; // Local function declarations ////////////////////////////////////////////////////////////////////////// namespace NoGUIHost { +/// Starts the virtual machine. +static void StartSystem(SystemBootParameters params); + static bool ParseCommandLineParametersAndInitializeConfig(int argc, char* argv[], std::optional& autoboot); static void PrintCommandLineVersion(); @@ -88,8 +91,6 @@ static void UpdateWindowTitle(const std::string& game_title); static void CancelAsyncOp(); static void StartAsyncOp(std::function callback); static void AsyncOpThreadEntryPoint(std::function callback); -static bool AcquireHostDisplay(RenderAPI api); -static void ReleaseHostDisplay(); } // namespace NoGUIHost ////////////////////////////////////////////////////////////////////////// @@ -102,7 +103,7 @@ static bool s_save_state_on_shutdown = false; static bool s_was_paused_by_focus_loss = false; static Threading::Thread s_cpu_thread; -static Threading::KernelSemaphore s_host_display_created_or_destroyed; +static Threading::KernelSemaphore s_platform_window_updated; static std::atomic_bool s_running{false}; static std::mutex s_cpu_thread_events_mutex; static std::condition_variable s_cpu_thread_event_done; @@ -159,7 +160,7 @@ void NoGUIHost::SetAppRoot() void NoGUIHost::SetResourcesDirectory() { -#ifndef __APPLE__ +#ifndef __APPLE__NOT_USED // Not using bundles yet. // On Windows/Linux, these are in the binary directory. EmuFolders::Resources = Path::Combine(EmuFolders::AppRoot, "resources"); #else @@ -423,8 +424,7 @@ void NoGUIHost::StartSystem(SystemBootParameters params) void NoGUIHost::ProcessPlatformWindowResize(s32 width, s32 height, float scale) { Host::RunOnCPUThread([width, height, scale]() { - // TODO: Scale - g_host_display->ResizeWindow(width, height); + g_gpu_device->ResizeWindow(width, height, scale); ImGuiManager::WindowResized(); System::HostDisplayResized(); }); @@ -432,8 +432,8 @@ void NoGUIHost::ProcessPlatformWindowResize(s32 width, s32 height, float scale) void NoGUIHost::ProcessPlatformMouseMoveEvent(float x, float y) { - if (g_host_display) - g_host_display->SetMousePosition(static_cast(x), static_cast(y)); + if (g_gpu_device) + g_gpu_device->SetMousePosition(static_cast(x), static_cast(y)); InputManager::UpdatePointerAbsolutePosition(0, x, y); ImGuiManager::UpdateMousePosition(x, y); @@ -616,8 +616,8 @@ void NoGUIHost::CPUThreadEntryPoint() // input source setup must happen on emu thread CommonHost::Initialize(); - // start the GS thread up and get it going - if (AcquireHostDisplay(Settings::GetRenderAPIForRenderer(g_settings.gpu_renderer))) + // start the fullscreen UI and get it going + if (Host::CreateGPUDevice(Settings::GetRenderAPIForRenderer(g_settings.gpu_renderer)) && FullscreenUI::Initialize()) { // kick a game list refresh if we're not in batch mode if (!InBatchMode()) @@ -637,7 +637,8 @@ void NoGUIHost::CPUThreadEntryPoint() if (System::IsValid()) System::ShutdownSystem(false); - ReleaseHostDisplay(); + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); CommonHost::Shutdown(); g_nogui_window->QuitMessageLoop(); @@ -655,58 +656,35 @@ void NoGUIHost::CPUThreadMainLoop() Host::PumpMessagesOnCPUThread(); Host::RenderDisplay(false); - if (!g_host_display->IsVsyncEnabled()) - g_host_display->ThrottlePresentation(); + if (!g_gpu_device->IsVsyncEnabled()) + g_gpu_device->ThrottlePresentation(); } } -bool NoGUIHost::AcquireHostDisplay(RenderAPI api) +std::optional Host::AcquireRenderWindow(bool recreate_window) { - Assert(!g_host_display); + std::optional wi; - g_nogui_window->ExecuteInMessageLoop([api]() { - if (g_nogui_window->CreatePlatformWindow(GetWindowTitle(System::GetGameTitle()))) + g_nogui_window->ExecuteInMessageLoop([&wi, recreate_window]() { + bool res = g_nogui_window->HasPlatformWindow(); + if (!res || recreate_window) { - const std::optional wi(g_nogui_window->GetPlatformWindowInfo()); - if (wi.has_value()) - { - g_host_display = Host::CreateDisplayForAPI(api); - if (g_host_display && !g_host_display->CreateDevice(wi.value(), System::ShouldUseVSync())) - g_host_display.reset(); - } - - if (g_host_display) - g_host_display->DoneCurrent(); - else + if (res) g_nogui_window->DestroyPlatformWindow(); - } - s_host_display_created_or_destroyed.Post(); + res = g_nogui_window->CreatePlatformWindow(NoGUIHost::GetWindowTitle(System::GetGameTitle())); + } + if (res) + wi = g_nogui_window->GetPlatformWindowInfo(); + s_platform_window_updated.Post(); }); - s_host_display_created_or_destroyed.Wait(); + s_platform_window_updated.Wait(); - if (!g_host_display) + if (!wi.has_value()) { - g_nogui_window->ReportError("Error", "Failed to create host display."); - return false; - } - - if (!g_host_display->MakeCurrent() || !g_host_display->SetupDevice() || !ImGuiManager::Initialize() || - !CommonHost::CreateHostDisplayResources()) - { - ImGuiManager::Shutdown(); - CommonHost::ReleaseHostDisplayResources(); - g_host_display.reset(); - g_nogui_window->DestroyPlatformWindow(); - return false; - } - - if (!FullscreenUI::Initialize()) - { - g_nogui_window->ReportError("Error", "Failed to initialize fullscreen UI"); - ReleaseHostDisplay(); - return false; + g_nogui_window->ReportError("Error", "Failed to create render window."); + return std::nullopt; } // reload input sources, since it might use the window handle @@ -714,44 +692,18 @@ bool NoGUIHost::AcquireHostDisplay(RenderAPI api) auto lock = Host::GetSettingsLock(); InputManager::ReloadSources(*Host::GetSettingsInterface(), lock); } - return true; + + return wi; } -bool Host::AcquireHostDisplay(RenderAPI api) +void Host::ReleaseRenderWindow() { - if (g_host_display && g_host_display->GetRenderAPI() == api) - { - // current is fine - return true; - } - - // otherwise we need to switch - NoGUIHost::ReleaseHostDisplay(); - return NoGUIHost::AcquireHostDisplay(api); -} - -void NoGUIHost::ReleaseHostDisplay() -{ - if (!g_host_display) - return; - - // close input sources, since it might use the window handle - InputManager::CloseSources(); - - CommonHost::ReleaseHostDisplayResources(); - FullscreenUI::Shutdown(); - ImGuiManager::Shutdown(); - g_host_display.reset(); + // Need to block here, otherwise the recreation message associates with the old window. g_nogui_window->ExecuteInMessageLoop([]() { g_nogui_window->DestroyPlatformWindow(); - s_host_display_created_or_destroyed.Post(); + s_platform_window_updated.Post(); }); - s_host_display_created_or_destroyed.Wait(); -} - -void Host::ReleaseHostDisplay() -{ - // we keep the fsui going, so no need to do anything here + s_platform_window_updated.Wait(); } void Host::OnSystemStarting() @@ -786,38 +738,10 @@ void Host::OnSystemDestroyed() Log_VerbosePrintf("Host::OnSystemDestroyed()"); } -void Host::InvalidateDisplay() +void Host::BeginPresentFrame() { - RenderDisplay(false); } -void Host::RenderDisplay(bool skip_present) -{ - // acquire for IO.MousePos. - std::atomic_thread_fence(std::memory_order_acquire); - - if (!skip_present) - { - FullscreenUI::Render(); - ImGuiManager::RenderTextOverlays(); - ImGuiManager::RenderOSDMessages(); - } - - // Debug windows are always rendered, otherwise mouse input breaks on skip. - ImGuiManager::RenderOverlayWindows(); - ImGuiManager::RenderDebugWindows(); - - g_host_display->Render(skip_present); - - ImGuiManager::NewFrame(); -} - -// void Host::ResizeHostDisplay(u32 new_window_width, u32 new_window_height, float new_window_scale) -// { -// s_host_display->ResizeRenderWindow(new_window_width, new_window_height, new_window_scale); -// ImGuiManager::WindowResized(); -// } - void Host::RequestResizeHostDisplay(s32 width, s32 height) { g_nogui_window->RequestRenderWindowSize(width, height); @@ -879,7 +803,7 @@ std::unique_ptr NoGUIHost::CreatePlatform() #if defined(_WIN32) ret = NoGUIPlatform::CreateWin32Platform(); #elif defined(__APPLE__) - // nothing yet + ret = NoGUIPlatform::CreateCocoaPlatform(); #else // linux const char* platform = std::getenv("DUCKSTATION_NOGUI_PLATFORM"); @@ -891,10 +815,6 @@ std::unique_ptr NoGUIHost::CreatePlatform() if (!ret && (!platform || StringUtil::Strcasecmp(platform, "x11") == 0) && std::getenv("DISPLAY")) ret = NoGUIPlatform::CreateX11Platform(); #endif -#ifdef NOGUI_PLATFORM_VTY - if (!ret && (!platform || StringUtil::Strcasecmp(platform, "vty") == 0)) - ret = NoGUIPlatform::CreateVTYPlatform(); -#endif #endif return ret; diff --git a/src/duckstation-nogui/nogui_host.h b/src/duckstation-nogui/nogui_host.h index c572a8fa4..7087bded4 100644 --- a/src/duckstation-nogui/nogui_host.h +++ b/src/duckstation-nogui/nogui_host.h @@ -3,7 +3,7 @@ #pragma once #include "common/types.h" -#include "core/system.h" + #include #include @@ -12,9 +12,6 @@ namespace NoGUIHost { bool InBatchMode(); void SetBatchMode(bool enabled); -/// Starts the virtual machine. -void StartSystem(SystemBootParameters params); - /// Returns the application name and version, optionally including debug/devel config indicator. std::string GetAppNameAndVersion(); @@ -36,4 +33,4 @@ void PlatformWindowFocusLost(); void PlatformDevicesChanged(); bool GetSavedPlatformWindowGeometry(s32* x, s32* y, s32* width, s32* height); void SavePlatformWindowGeometry(s32 x, s32 y, s32 width, s32 height); -} // namespace NoGUIHost \ No newline at end of file +} // namespace NoGUIHost diff --git a/src/duckstation-nogui/nogui_platform.h b/src/duckstation-nogui/nogui_platform.h index cb79527ce..2810d2e68 100644 --- a/src/duckstation-nogui/nogui_platform.h +++ b/src/duckstation-nogui/nogui_platform.h @@ -3,8 +3,10 @@ #pragma once +#include "util/gpu_device.h" + #include "common/types.h" -#include "util/host_display.h" + #include #include #include @@ -24,6 +26,7 @@ public: virtual void SetDefaultConfig(SettingsInterface& si) = 0; virtual bool CreatePlatformWindow(std::string title) = 0; + virtual bool HasPlatformWindow() const = 0; virtual void DestroyPlatformWindow() = 0; virtual std::optional GetPlatformWindowInfo() = 0; @@ -46,20 +49,19 @@ public: #ifdef _WIN32 static std::unique_ptr CreateWin32Platform(); #endif - +#ifdef __APPLE__ + static std::unique_ptr CreateCocoaPlatform(); +#endif #ifdef NOGUI_PLATFORM_WAYLAND static std::unique_ptr CreateWaylandPlatform(); #endif #ifdef NOGUI_PLATFORM_X11 static std::unique_ptr CreateX11Platform(); #endif -#ifdef NOGUI_PLATFORM_VTY - static std::unique_ptr CreateVTYPlatform(); -#endif protected: static constexpr s32 DEFAULT_WINDOW_WIDTH = 1280; static constexpr s32 DEFAULT_WINDOW_HEIGHT = 720; }; -extern std::unique_ptr g_nogui_window; \ No newline at end of file +extern std::unique_ptr g_nogui_window; diff --git a/src/duckstation-nogui/vty_key_names.h b/src/duckstation-nogui/vty_key_names.h deleted file mode 100644 index 2054acc03..000000000 --- a/src/duckstation-nogui/vty_key_names.h +++ /dev/null @@ -1,281 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/string.h" -#include "common/types.h" -#include -#include -#include -#include -#include -#include - -namespace VTYKeyNames { - -static const std::map s_evdev_key_names = {{KEY_ESC, "Escape"}, - {KEY_1, "1"}, - {KEY_2, "2"}, - {KEY_3, "3"}, - {KEY_4, "4"}, - {KEY_5, "5"}, - {KEY_6, "6"}, - {KEY_7, "7"}, - {KEY_8, "8"}, - {KEY_9, "9"}, - {KEY_0, "0"}, - {KEY_MINUS, "Minus"}, - {KEY_EQUAL, "Equal"}, - {KEY_BACKSPACE, "Backspace"}, - {KEY_TAB, "Tab"}, - {KEY_Q, "Q"}, - {KEY_W, "W"}, - {KEY_E, "E"}, - {KEY_R, "R"}, - {KEY_T, "T"}, - {KEY_Y, "Y"}, - {KEY_U, "U"}, - {KEY_I, "I"}, - {KEY_O, "O"}, - {KEY_P, "P"}, - {KEY_LEFTBRACE, "Leftbrace"}, - {KEY_RIGHTBRACE, "Rightbrace"}, - {KEY_ENTER, "Return"}, - {KEY_LEFTCTRL, "Leftctrl"}, - {KEY_A, "A"}, - {KEY_S, "S"}, - {KEY_D, "D"}, - {KEY_F, "F"}, - {KEY_G, "G"}, - {KEY_H, "H"}, - {KEY_J, "J"}, - {KEY_K, "K"}, - {KEY_L, "L"}, - {KEY_SEMICOLON, "Semicolon"}, - {KEY_APOSTROPHE, "Apostrophe"}, - {KEY_GRAVE, "Grave"}, - {KEY_LEFTSHIFT, "Leftshift"}, - {KEY_BACKSLASH, "Backslash"}, - {KEY_Z, "Z"}, - {KEY_X, "X"}, - {KEY_C, "C"}, - {KEY_V, "V"}, - {KEY_B, "B"}, - {KEY_N, "N"}, - {KEY_M, "M"}, - {KEY_COMMA, "Comma"}, - {KEY_DOT, "Dot"}, - {KEY_SLASH, "Slash"}, - {KEY_RIGHTSHIFT, "Rightshift"}, - {KEY_KPASTERISK, "Kpasterisk"}, - {KEY_LEFTALT, "Leftalt"}, - {KEY_SPACE, "Space"}, - {KEY_CAPSLOCK, "Capslock"}, - {KEY_F1, "F1"}, - {KEY_F2, "F2"}, - {KEY_F3, "F3"}, - {KEY_F4, "F4"}, - {KEY_F5, "F5"}, - {KEY_F6, "F6"}, - {KEY_F7, "F7"}, - {KEY_F8, "F8"}, - {KEY_F9, "F9"}, - {KEY_F10, "F10"}, - {KEY_NUMLOCK, "Numlock"}, - {KEY_SCROLLLOCK, "Scrolllock"}, - {KEY_KP7, "Kp7"}, - {KEY_KP8, "Kp8"}, - {KEY_KP9, "Kp9"}, - {KEY_KPMINUS, "Kpminus"}, - {KEY_KP4, "Kp4"}, - {KEY_KP5, "Kp5"}, - {KEY_KP6, "Kp6"}, - {KEY_KPPLUS, "Kpplus"}, - {KEY_KP1, "Kp1"}, - {KEY_KP2, "Kp2"}, - {KEY_KP3, "Kp3"}, - {KEY_KP0, "Kp0"}, - {KEY_KPDOT, "Kpdot"}, - {KEY_ZENKAKUHANKAKU, "Zenkakuhankaku"}, - {KEY_102ND, "102nd"}, - {KEY_F11, "F11"}, - {KEY_F12, "F12"}, - {KEY_RO, "Ro"}, - {KEY_KATAKANA, "Katakana"}, - {KEY_HIRAGANA, "Hiragana"}, - {KEY_HENKAN, "Henkan"}, - {KEY_KATAKANAHIRAGANA, "Katakanahiragana"}, - {KEY_MUHENKAN, "Muhenkan"}, - {KEY_KPJPCOMMA, "Kpjpcomma"}, - {KEY_KPENTER, "Kpenter"}, - {KEY_RIGHTCTRL, "Rightctrl"}, - {KEY_KPSLASH, "Kpslash"}, - {KEY_SYSRQ, "Sysrq"}, - {KEY_RIGHTALT, "RightAlt"}, - {KEY_LINEFEED, "Linefeed"}, - {KEY_HOME, "Home"}, - {KEY_UP, "Up"}, - {KEY_PAGEUP, "PageUp"}, - {KEY_LEFT, "Left"}, - {KEY_RIGHT, "Right"}, - {KEY_END, "End"}, - {KEY_DOWN, "Down"}, - {KEY_PAGEDOWN, "PageDown"}, - {KEY_INSERT, "Insert"}, - {KEY_DELETE, "Delete"}, - {KEY_MACRO, "Macro"}, - {KEY_MUTE, "Mute"}, - {KEY_VOLUMEDOWN, "VolumeDown"}, - {KEY_VOLUMEUP, "VolumeUp"}, - {KEY_POWER, "Power"}, - {KEY_KPEQUAL, "Kpequal"}, - {KEY_KPPLUSMINUS, "Kpplusminus"}, - {KEY_PAUSE, "Pause"}, - {KEY_SCALE, "Scale"}, - {KEY_KPCOMMA, "Kpcomma"}, - {KEY_HANGEUL, "Hangeul"}, - {KEY_HANGUEL, "Hanguel"}, - {KEY_HANJA, "Hanja"}, - {KEY_YEN, "Yen"}, - {KEY_LEFTMETA, "Leftmeta"}, - {KEY_RIGHTMETA, "Rightmeta"}, - {KEY_COMPOSE, "Compose"}, - {KEY_STOP, "Stop"}, - {KEY_AGAIN, "Again"}, - {KEY_PROPS, "Props"}, - {KEY_UNDO, "Undo"}, - {KEY_FRONT, "Front"}, - {KEY_COPY, "Copy"}, - {KEY_OPEN, "Open"}, - {KEY_PASTE, "Paste"}, - {KEY_FIND, "Find"}, - {KEY_CUT, "Cut"}, - {KEY_HELP, "Help"}, - {KEY_MENU, "Menu"}, - {KEY_CALC, "Calc"}, - {KEY_SETUP, "Setup"}, - {KEY_SLEEP, "Sleep"}, - {KEY_WAKEUP, "Wakeup"}, - {KEY_FILE, "File"}, - {KEY_SENDFILE, "Sendfile"}, - {KEY_DELETEFILE, "Deletefile"}, - {KEY_XFER, "Xfer"}, - {KEY_PROG1, "Prog1"}, - {KEY_PROG2, "Prog2"}, - {KEY_WWW, "Www"}, - {KEY_MSDOS, "Msdos"}, - {KEY_COFFEE, "Coffee"}, - {KEY_SCREENLOCK, "Screenlock"}, - {KEY_ROTATE_DISPLAY, "Rotate_display"}, - {KEY_DIRECTION, "Direction"}, - {KEY_CYCLEWINDOWS, "Cyclewindows"}, - {KEY_MAIL, "Mail"}, - {KEY_BOOKMARKS, "Bookmarks"}, - {KEY_COMPUTER, "Computer"}, - {KEY_BACK, "Back"}, - {KEY_FORWARD, "Forward"}, - {KEY_CLOSECD, "Closecd"}, - {KEY_EJECTCD, "Ejectcd"}, - {KEY_EJECTCLOSECD, "Ejectclosecd"}, - {KEY_NEXTSONG, "Nextsong"}, - {KEY_PLAYPAUSE, "Playpause"}, - {KEY_PREVIOUSSONG, "Previoussong"}, - {KEY_STOPCD, "Stopcd"}, - {KEY_RECORD, "Record"}, - {KEY_REWIND, "Rewind"}, - {KEY_PHONE, "Phone"}, - {KEY_ISO, "Iso"}, - {KEY_CONFIG, "Config"}, - {KEY_HOMEPAGE, "Homepage"}, - {KEY_REFRESH, "Refresh"}, - {KEY_EXIT, "Exit"}, - {KEY_MOVE, "Move"}, - {KEY_EDIT, "Edit"}, - {KEY_SCROLLUP, "Scrollup"}, - {KEY_SCROLLDOWN, "Scrolldown"}, - {KEY_KPLEFTPAREN, "Kpleftparen"}, - {KEY_KPRIGHTPAREN, "Kprightparen"}, - {KEY_NEW, "New"}, - {KEY_REDO, "Redo"}, - {KEY_F13, "F13"}, - {KEY_F14, "F14"}, - {KEY_F15, "F15"}, - {KEY_F16, "F16"}, - {KEY_F17, "F17"}, - {KEY_F18, "F18"}, - {KEY_F19, "F19"}, - {KEY_F20, "F20"}, - {KEY_F21, "F21"}, - {KEY_F22, "F22"}, - {KEY_F23, "F23"}, - {KEY_F24, "F24"}, - {KEY_PLAYCD, "Playcd"}, - {KEY_PAUSECD, "Pausecd"}, - {KEY_PROG3, "Prog3"}, - {KEY_PROG4, "Prog4"}, - {KEY_DASHBOARD, "Dashboard"}, - {KEY_SUSPEND, "Suspend"}, - {KEY_CLOSE, "Close"}, - {KEY_PLAY, "Play"}, - {KEY_FASTFORWARD, "Fastforward"}, - {KEY_BASSBOOST, "Bassboost"}, - {KEY_PRINT, "Print"}, - {KEY_HP, "Hp"}, - {KEY_CAMERA, "Camera"}, - {KEY_SOUND, "Sound"}, - {KEY_QUESTION, "Question"}, - {KEY_EMAIL, "Email"}, - {KEY_CHAT, "Chat"}, - {KEY_SEARCH, "Search"}, - {KEY_CONNECT, "Connect"}, - {KEY_FINANCE, "Finance"}, - {KEY_SPORT, "Sport"}, - {KEY_SHOP, "Shop"}, - {KEY_ALTERASE, "Alterase"}, - {KEY_CANCEL, "Cancel"}, - {KEY_BRIGHTNESSDOWN, "Brightnessdown"}, - {KEY_BRIGHTNESSUP, "Brightnessup"}, - {KEY_MEDIA, "Media"}, - {KEY_SWITCHVIDEOMODE, "Switchvideomode"}, - {KEY_KBDILLUMTOGGLE, "Kbdillumtoggle"}, - {KEY_KBDILLUMDOWN, "Kbdillumdown"}, - {KEY_KBDILLUMUP, "Kbdillumup"}, - {KEY_SEND, "Send"}, - {KEY_REPLY, "Reply"}, - {KEY_FORWARDMAIL, "Forwardmail"}, - {KEY_SAVE, "Save"}, - {KEY_DOCUMENTS, "Documents"}, - {KEY_BATTERY, "Battery"}, - {KEY_BLUETOOTH, "Bluetooth"}, - {KEY_WLAN, "Wlan"}, - {KEY_UWB, "Uwb"}, - {KEY_UNKNOWN, "Unknown"}, - {KEY_VIDEO_NEXT, "Video_next"}, - {KEY_VIDEO_PREV, "Video_prev"}, - {KEY_BRIGHTNESS_CYCLE, "Brightness_cycle"}, - {KEY_BRIGHTNESS_AUTO, "Brightness_auto"}, - {KEY_BRIGHTNESS_ZERO, "Brightness_zero"}, - {KEY_DISPLAY_OFF, "Display_off"}, - {KEY_WWAN, "Wwan"}, - {KEY_WIMAX, "Wimax"}, - {KEY_RFKILL, "Rfkill"}, - {KEY_MICMUTE, "Micmute"}}; - -static inline const char* GetKeyName(int key) -{ - const auto it = s_evdev_key_names.find(key); - return it == s_evdev_key_names.end() ? nullptr : it->second; -} - -static inline std::optional GetKeyCodeForName(const std::string_view key_name) -{ - for (const auto& it : s_evdev_key_names) - { - if (key_name == it.second) - return it.first; - } - - return std::nullopt; -} - -} // namespace VTYKeyNames diff --git a/src/duckstation-nogui/vty_nogui_platform.cpp b/src/duckstation-nogui/vty_nogui_platform.cpp deleted file mode 100644 index 3c394dab6..000000000 --- a/src/duckstation-nogui/vty_nogui_platform.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "vty_nogui_platform.h" -#include "common/log.h" -#include "common/string_util.h" -#include "common/threading.h" -#include "core/host.h" -#include "core/host_settings.h" -#include "nogui_host.h" -#include "resource.h" -#include "vty_key_names.h" -#include -#include -#include -#include -Log_SetChannel(VTYNoGUIPlatform); - -#ifdef WITH_DRMKMS -#include "common/drm_display.h" -#endif - -VTYNoGUIPlatform::VTYNoGUIPlatform() -{ - m_message_loop_running.store(true, std::memory_order_release); -} - -VTYNoGUIPlatform::~VTYNoGUIPlatform() -{ - CloseEVDevFDs(); -} - -std::unique_ptr NoGUIPlatform::CreateVTYPlatform() -{ - std::unique_ptr platform(std::make_unique()); - if (!platform->Initialize()) - platform.reset(); - return platform; -} - -bool VTYNoGUIPlatform::Initialize() -{ - OpenEVDevFDs(); - return true; -} - -void VTYNoGUIPlatform::ReportError(const std::string_view& title, const std::string_view& message) -{ - // not implemented -} - -bool VTYNoGUIPlatform::ConfirmMessage(const std::string_view& title, const std::string_view& message) -{ - // not implemented - return true; -} - -void VTYNoGUIPlatform::SetDefaultConfig(SettingsInterface& si) -{ - // noop -} - -bool VTYNoGUIPlatform::CreatePlatformWindow(std::string title) -{ - return true; -} - -void VTYNoGUIPlatform::DestroyPlatformWindow() -{ - // noop -} - -std::optional VTYNoGUIPlatform::GetPlatformWindowInfo() -{ - WindowInfo wi; - wi.type = WindowInfo::Type::Display; - wi.surface_width = 0; - wi.surface_height = 0; - wi.surface_refresh_rate = 0.0f; - wi.surface_format = WindowInfo::SurfaceFormat::Auto; - - const std::string fullscreen_mode = Host::GetStringSettingValue("GPU", "FullscreenMode", ""); - if (!fullscreen_mode.empty()) - { - if (!HostDisplay::ParseFullscreenMode(fullscreen_mode, &wi.surface_width, &wi.surface_height, - &wi.surface_refresh_rate)) - { - Log_ErrorPrintf("Failed to parse fullscreen mode '%s'", fullscreen_mode.c_str()); - } - } - -#ifdef WITH_DRMKMS - // set to current mode - if (wi.surface_width == 0) - { - if (!DRMDisplay::GetCurrentMode(&wi.surface_width, &wi.surface_height, &wi.surface_refresh_rate)) - Log_ErrorPrintf("Failed to get current mode, will use default."); - } -#endif - - // This isn't great, but it's an approximation at least.. - if (wi.surface_width > 0) - wi.surface_scale = std::max(0.1f, static_cast(wi.surface_width) / 1280.0f); - - return wi; -} - -void VTYNoGUIPlatform::SetFullscreen(bool enabled) -{ - // already fullscreen :-) -} - -bool VTYNoGUIPlatform::RequestRenderWindowSize(s32 new_window_width, s32 new_window_height) -{ - return false; -} - -bool VTYNoGUIPlatform::OpenURL(const std::string_view& url) -{ - Log_ErrorPrintf("VTYNoGUIPlatform::OpenURL() not implemented: %.*s", static_cast(url.size()), url.data()); - return false; -} - -bool VTYNoGUIPlatform::CopyTextToClipboard(const std::string_view& text) -{ - Log_ErrorPrintf("VTYNoGUIPlatform::CopyTextToClipboard() not implemented: %.*s", static_cast(text.size()), - text.data()); - return false; -} - -void VTYNoGUIPlatform::SetPlatformWindowTitle(std::string title) -{ - Log_InfoPrintf("Window Title: %s", title.c_str()); -} - -void VTYNoGUIPlatform::RunMessageLoop() -{ - while (m_message_loop_running.load(std::memory_order_acquire)) - { - PollEvDevKeyboards(); - - { - std::unique_lock lock(m_callback_queue_mutex); - while (!m_callback_queue.empty()) - { - std::function func = std::move(m_callback_queue.front()); - m_callback_queue.pop_front(); - lock.unlock(); - func(); - lock.lock(); - } - } - - // TODO: Make this suck less. - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } -} - -void VTYNoGUIPlatform::ExecuteInMessageLoop(std::function func) -{ - std::unique_lock lock(m_callback_queue_mutex); - m_callback_queue.push_back(std::move(func)); -} - -void VTYNoGUIPlatform::QuitMessageLoop() -{ - m_message_loop_running.store(false, std::memory_order_release); -} - -void VTYNoGUIPlatform::OpenEVDevFDs() -{ - for (int i = 0; i < 1000; i++) - { - TinyString path; - path.Format("/dev/input/event%d", i); - - int fd = open(path, O_RDONLY | O_NONBLOCK); - if (fd < 0) - break; - - struct libevdev* obj; - if (libevdev_new_from_fd(fd, &obj) != 0) - { - Log_ErrorPrintf("libevdev_new_from_fd(%s) failed", path.GetCharArray()); - close(fd); - continue; - } - - Log_DevPrintf("Input path: %s", path.GetCharArray()); - Log_DevPrintf("Input device name: \"%s\"", libevdev_get_name(obj)); - Log_DevPrintf("Input device ID: bus %#x vendor %#x product %#x", libevdev_get_id_bustype(obj), - libevdev_get_id_vendor(obj), libevdev_get_id_product(obj)); - if (!libevdev_has_event_code(obj, EV_KEY, KEY_SPACE)) - { - Log_DevPrintf("This device does not look like a keyboard"); - libevdev_free(obj); - close(fd); - continue; - } - - const int grab_res = libevdev_grab(obj, LIBEVDEV_GRAB); - if (grab_res != 0) - Log_WarningPrintf("Failed to grab '%s' (%s): %d", libevdev_get_name(obj), path.GetCharArray(), grab_res); - - m_evdev_keyboards.push_back({obj, fd}); - } -} - -void VTYNoGUIPlatform::CloseEVDevFDs() -{ - for (const EvDevKeyboard& kb : m_evdev_keyboards) - { - libevdev_grab(kb.obj, LIBEVDEV_UNGRAB); - libevdev_free(kb.obj); - close(kb.fd); - } - m_evdev_keyboards.clear(); -} - -void VTYNoGUIPlatform::PollEvDevKeyboards() -{ - for (const EvDevKeyboard& kb : m_evdev_keyboards) - { - struct input_event ev; - while (libevdev_next_event(kb.obj, LIBEVDEV_READ_FLAG_NORMAL, &ev) == 0) - { - // auto-repeat - // TODO: forward char to imgui - if (ev.value == 2) - continue; - - const bool pressed = (ev.value == 1); - NoGUIHost::ProcessPlatformKeyEvent(static_cast(ev.code), pressed); - } - } -} - -std::optional VTYNoGUIPlatform::ConvertHostKeyboardStringToCode(const std::string_view& str) -{ - std::optional converted(VTYKeyNames::GetKeyCodeForName(str)); - return converted.has_value() ? std::optional(static_cast(converted.value())) : std::nullopt; -} - -std::optional VTYNoGUIPlatform::ConvertHostKeyboardCodeToString(u32 code) -{ - const char* keyname = VTYKeyNames::GetKeyName(static_cast(code)); - return keyname ? std::optional(std::string(keyname)) : std::nullopt; -} diff --git a/src/duckstation-nogui/wayland_nogui_platform.cpp b/src/duckstation-nogui/wayland_nogui_platform.cpp index c6d4ec217..52bf96727 100644 --- a/src/duckstation-nogui/wayland_nogui_platform.cpp +++ b/src/duckstation-nogui/wayland_nogui_platform.cpp @@ -150,6 +150,11 @@ bool WaylandNoGUIPlatform::CreatePlatformWindow(std::string title) return true; } +bool WaylandNoGUIPlatform::HasPlatformWindow() const +{ + return (m_surface != nullptr); +} + void WaylandNoGUIPlatform::DestroyPlatformWindow() { m_window_info = {}; diff --git a/src/duckstation-nogui/wayland_nogui_platform.h b/src/duckstation-nogui/wayland_nogui_platform.h index 25f8b3714..2423abee5 100644 --- a/src/duckstation-nogui/wayland_nogui_platform.h +++ b/src/duckstation-nogui/wayland_nogui_platform.h @@ -29,6 +29,7 @@ public: void SetDefaultConfig(SettingsInterface& si) override; bool CreatePlatformWindow(std::string title) override; + bool HasPlatformWindow() const override; void DestroyPlatformWindow() override; std::optional GetPlatformWindowInfo() override; void SetPlatformWindowTitle(std::string title) override; diff --git a/src/duckstation-nogui/win32_nogui_platform.cpp b/src/duckstation-nogui/win32_nogui_platform.cpp index 912fbd16d..0e7d81e1a 100644 --- a/src/duckstation-nogui/win32_nogui_platform.cpp +++ b/src/duckstation-nogui/win32_nogui_platform.cpp @@ -134,6 +134,11 @@ bool Win32NoGUIPlatform::CreatePlatformWindow(std::string title) return true; } +bool Win32NoGUIPlatform::HasPlatformWindow() const +{ + return (m_hwnd != NULL); +} + void Win32NoGUIPlatform::DestroyPlatformWindow() { if (!m_hwnd) diff --git a/src/duckstation-nogui/win32_nogui_platform.h b/src/duckstation-nogui/win32_nogui_platform.h index d6ad40f20..7af55f920 100644 --- a/src/duckstation-nogui/win32_nogui_platform.h +++ b/src/duckstation-nogui/win32_nogui_platform.h @@ -23,6 +23,7 @@ public: void SetDefaultConfig(SettingsInterface& si) override; bool CreatePlatformWindow(std::string title) override; + bool HasPlatformWindow() const override; void DestroyPlatformWindow() override; std::optional GetPlatformWindowInfo() override; void SetPlatformWindowTitle(std::string title) override; diff --git a/src/duckstation-nogui/x11_nogui_platform.cpp b/src/duckstation-nogui/x11_nogui_platform.cpp index 2dcc270bd..ab6e456ff 100644 --- a/src/duckstation-nogui/x11_nogui_platform.cpp +++ b/src/duckstation-nogui/x11_nogui_platform.cpp @@ -99,6 +99,11 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title) return true; } +bool X11NoGUIPlatform::HasPlatformWindow() const +{ + return m_window != 0; +} + void X11NoGUIPlatform::DestroyPlatformWindow() { m_window_info = {}; diff --git a/src/duckstation-nogui/x11_nogui_platform.h b/src/duckstation-nogui/x11_nogui_platform.h index 3a2583c9b..395353e7f 100644 --- a/src/duckstation-nogui/x11_nogui_platform.h +++ b/src/duckstation-nogui/x11_nogui_platform.h @@ -48,6 +48,7 @@ public: void SetDefaultConfig(SettingsInterface& si) override; bool CreatePlatformWindow(std::string title) override; + bool HasPlatformWindow() const override; void DestroyPlatformWindow() override; std::optional GetPlatformWindowInfo() override; void SetPlatformWindowTitle(std::string title) override; diff --git a/src/duckstation-qt/CMakeLists.txt b/src/duckstation-qt/CMakeLists.txt index 33478dc2b..213a138ad 100644 --- a/src/duckstation-qt/CMakeLists.txt +++ b/src/duckstation-qt/CMakeLists.txt @@ -198,7 +198,7 @@ if(WIN32) ) endif() -if(APPLE) +if(APPLE AND NOT CMAKE_GENERATOR MATCHES "Xcode") set(BUNDLE_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DuckStation.app) # Ask for an application bundle. diff --git a/src/duckstation-qt/advancedsettingswidget.cpp b/src/duckstation-qt/advancedsettingswidget.cpp index fe6377b39..67e032cb1 100644 --- a/src/duckstation-qt/advancedsettingswidget.cpp +++ b/src/duckstation-qt/advancedsettingswidget.cpp @@ -333,6 +333,8 @@ void AdvancedSettingsWidget::addTweakOptions() Settings::DEFAULT_GPU_MAX_RUN_AHEAD); addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Use Debug Host GPU Device"), "GPU", "UseDebugDevice", false); + addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Disable Shader Cache"), "GPU", "DisableShaderCache", + false); addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Stretch Display Vertically"), "Display", "StretchVertically", false); @@ -388,6 +390,7 @@ void AdvancedSettingsWidget::onResetToDefaultClicked() setIntRangeTweakOption(m_ui.tweakOptionTable, i++, static_cast(Settings::DEFAULT_GPU_MAX_RUN_AHEAD)); // GPU max run-ahead setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Use debug host GPU device + setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Disable Shader Cache setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Stretch Display Vertically setBooleanTweakOption(m_ui.tweakOptionTable, i++, true); // Increase Timer Resolution setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Allow booting without SBI file diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp index 95f1af139..c9a0c0797 100644 --- a/src/duckstation-qt/displaysettingswidget.cpp +++ b/src/duckstation-qt/displaysettingswidget.cpp @@ -12,11 +12,11 @@ // For enumerating adapters. #ifdef _WIN32 -#include "util/d3d11_host_display.h" -#include "util/d3d12_host_display.h" +#include "util/d3d11_device.h" +#include "util/d3d12_device.h" #endif #ifdef WITH_VULKAN -#include "util/vulkan_host_display.h" +#include "util/vulkan_device.h" #endif DisplaySettingsWidget::DisplaySettingsWidget(SettingsDialog* dialog, QWidget* parent) @@ -187,23 +187,28 @@ void DisplaySettingsWidget::setupAdditionalUi() void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() { - HostDisplay::AdapterAndModeList aml; + GPUDevice::AdapterAndModeList aml; bool thread_supported = false; bool threaded_presentation_supported = false; switch (static_cast(m_ui.renderer->currentIndex())) { #ifdef _WIN32 case GPURenderer::HardwareD3D11: - aml = D3D11HostDisplay::StaticGetAdapterAndModeList(); + aml = D3D11Device::StaticGetAdapterAndModeList(); break; case GPURenderer::HardwareD3D12: - aml = D3D12HostDisplay::StaticGetAdapterAndModeList(); + aml = D3D12Device::StaticGetAdapterAndModeList(); + break; +#endif +#ifdef __APPLE__ + case GPURenderer::HardwareMetal: + aml = GPUDevice::WrapGetMetalAdapterAndModeList(); break; #endif #ifdef WITH_VULKAN case GPURenderer::HardwareVulkan: - aml = VulkanHostDisplay::StaticGetAdapterAndModeList(nullptr); + aml = VulkanDevice::StaticGetAdapterAndModeList(); threaded_presentation_supported = true; break; #endif diff --git a/src/duckstation-qt/displaywidget.h b/src/duckstation-qt/displaywidget.h index 2da292159..a99263495 100644 --- a/src/duckstation-qt/displaywidget.h +++ b/src/duckstation-qt/displaywidget.h @@ -2,8 +2,11 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + +#include "util/window_info.h" + #include "common/types.h" -#include "common/window_info.h" + #include #include #include diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index d97254d4f..3a55fb61e 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -24,7 +24,7 @@ #include "core/system.h" #include "util/cd_image.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/platform_misc.h" #include "common/assert.h" @@ -218,77 +218,33 @@ bool MainWindow::nativeEvent(const QByteArray& eventType, void* message, qintptr #endif -bool MainWindow::createDisplay(bool fullscreen, bool render_to_main) +std::optional MainWindow::acquireRenderWindow(bool recreate_window, bool fullscreen, bool render_to_main, + bool surfaceless, bool use_main_window_pos) { - Log_DevPrintf("createDisplay(%u, %u)", static_cast(fullscreen), static_cast(render_to_main)); - - const std::string fullscreen_mode(Host::GetBaseStringSettingValue("GPU", "FullscreenMode", "")); - const bool is_exclusive_fullscreen = (fullscreen && !fullscreen_mode.empty() && g_host_display->SupportsFullscreen()); - - createDisplayWidget(fullscreen, render_to_main, is_exclusive_fullscreen); - - std::optional wi = m_display_widget->getWindowInfo(); - if (!wi.has_value()) - { - QMessageBox::critical(this, tr("Error"), tr("Failed to get window info from widget")); - destroyDisplayWidget(true); - return false; - } - - g_emu_thread->connectDisplaySignals(m_display_widget); - - if (!g_host_display->CreateDevice(wi.value(), System::ShouldUseVSync())) - { - QMessageBox::critical(this, tr("Error"), tr("Failed to create host display device context.")); - destroyDisplayWidget(true); - return false; - } - - m_display_created = true; - - if (is_exclusive_fullscreen) - setDisplayFullscreen(fullscreen_mode); - - updateWindowTitle(); - updateWindowState(); - - m_ui.actionStartFullscreenUI->setEnabled(false); - m_ui.actionStartFullscreenUI2->setEnabled(false); - - updateDisplayWidgetCursor(); - updateDisplayRelatedActions(true, render_to_main, fullscreen); - - m_display_widget->setFocus(); - - g_host_display->DoneCurrent(); - return true; -} - -bool MainWindow::updateDisplay(bool fullscreen, bool render_to_main, bool surfaceless) -{ - Log_DevPrintf("updateDisplay() fullscreen=%s render_to_main=%s surfaceless=%s", fullscreen ? "true" : "false", - render_to_main ? "true" : "false", surfaceless ? "true" : "false"); + Log_DevPrintf( + "acquireRenderWindow() recreate=%s fullscreen=%s render_to_main=%s surfaceless=%s use_main_window_pos=%s", + recreate_window ? "true" : "false", fullscreen ? "true" : "false", render_to_main ? "true" : "false", + surfaceless ? "true" : "false", use_main_window_pos ? "true" : "false"); QWidget* container = m_display_container ? static_cast(m_display_container) : static_cast(m_display_widget); const bool is_fullscreen = isRenderingFullscreen(); const bool is_rendering_to_main = isRenderingToMain(); - const std::string fullscreen_mode(Host::GetBaseStringSettingValue("GPU", "FullscreenMode", "")); - const bool is_exclusive_fullscreen = (fullscreen && !fullscreen_mode.empty() && g_host_display->SupportsFullscreen()); const bool changing_surfaceless = (!m_display_widget != surfaceless); - if (fullscreen == is_fullscreen && is_rendering_to_main == render_to_main && !changing_surfaceless) - return true; + if (m_display_created && !recreate_window && fullscreen == is_fullscreen && is_rendering_to_main == render_to_main && + !changing_surfaceless) + { + return m_display_widget ? m_display_widget->getWindowInfo() : WindowInfo(); + } // Skip recreating the surface if we're just transitioning between fullscreen and windowed with render-to-main off. // .. except on Wayland, where everything tends to break if you don't recreate. const bool has_container = (m_display_container != nullptr); const bool needs_container = DisplayContainer::isNeeded(fullscreen, render_to_main); - if (!is_rendering_to_main && !render_to_main && !is_exclusive_fullscreen && has_container == needs_container && - !needs_container && !changing_surfaceless) + if (m_display_created && !recreate_window && !is_rendering_to_main && !render_to_main && + has_container == needs_container && !needs_container && !changing_surfaceless) { Log_DevPrintf("Toggling to %s without recreating surface", (fullscreen ? "fullscreen" : "windowed")); - if (g_host_display->IsFullscreen()) - g_host_display->SetFullscreen(false, 0, 0, 0.0f); // since we don't destroy the display widget, we need to save it here if (!is_fullscreen && !is_rendering_to_main) @@ -306,53 +262,48 @@ bool MainWindow::updateDisplay(bool fullscreen, bool render_to_main, bool surfac updateDisplayWidgetCursor(); m_display_widget->setFocus(); + updateWindowState(); QCoreApplication::processEvents(QEventLoop::ExcludeUserInputEvents); - return true; + return m_display_widget->getWindowInfo(); } - g_host_display->DestroySurface(); - - destroyDisplayWidget(surfaceless || fullscreen); + destroyDisplayWidget(surfaceless); + m_display_created = true; // if we're going to surfaceless, we're done here if (surfaceless) - { - updateWindowState(); - updateDisplayRelatedActions(false, render_to_main, fullscreen); - return true; - } + return WindowInfo(); - createDisplayWidget(fullscreen, render_to_main, is_exclusive_fullscreen); + createDisplayWidget(fullscreen, render_to_main, use_main_window_pos); + + // we need the surface visible.. this might be able to be replaced with something else + QCoreApplication::processEvents(QEventLoop::ExcludeUserInputEvents); std::optional wi = m_display_widget->getWindowInfo(); if (!wi.has_value()) { - QMessageBox::critical(this, tr("Error"), tr("Failed to get new window info from widget")); + QMessageBox::critical(this, tr("Error"), tr("Failed to get window info from widget")); destroyDisplayWidget(true); - return false; + return std::nullopt; } g_emu_thread->connectDisplaySignals(m_display_widget); - if (!g_host_display->ChangeWindow(wi.value())) - Panic("Failed to recreate surface on new widget."); - - if (is_exclusive_fullscreen) - setDisplayFullscreen(fullscreen_mode); - updateWindowTitle(); updateWindowState(); + + m_ui.actionStartFullscreenUI->setEnabled(false); + m_ui.actionStartFullscreenUI2->setEnabled(false); + updateDisplayWidgetCursor(); updateDisplayRelatedActions(true, render_to_main, fullscreen); m_display_widget->setFocus(); - QSignalBlocker blocker(m_ui.actionFullscreen); - m_ui.actionFullscreen->setChecked(fullscreen); - return true; + return wi; } -void MainWindow::createDisplayWidget(bool fullscreen, bool render_to_main, bool is_exclusive_fullscreen) +void MainWindow::createDisplayWidget(bool fullscreen, bool render_to_main, bool use_main_window_pos) { // If we're rendering to main and were hidden (e.g. coming back from fullscreen), // make sure we're visible before trying to add ourselves. Otherwise Wayland breaks. @@ -388,20 +339,21 @@ void MainWindow::createDisplayWidget(bool fullscreen, bool render_to_main, bool // and positioning has no effect anyway. if (!s_use_central_widget) { - if (isVisible()) + if (isVisible() && g_emu_thread->shouldRenderToMain()) container->move(pos()); else restoreDisplayWindowGeometryFromConfig(); } - if (!is_exclusive_fullscreen) - container->showFullScreen(); - else - container->showNormal(); + container->showFullScreen(); } else if (!render_to_main) { - restoreDisplayWindowGeometryFromConfig(); + // See lameland comment above. + if (use_main_window_pos && !s_use_central_widget) + container->move(pos()); + else + restoreDisplayWindowGeometryFromConfig(); container->showNormal(); } else if (s_use_central_widget) @@ -420,27 +372,13 @@ void MainWindow::createDisplayWidget(bool fullscreen, bool render_to_main, bool m_ui.mainContainer->setCurrentIndex(1); } + updateDisplayRelatedActions(true, render_to_main, fullscreen); + // We need the surface visible. QGuiApplication::sync(); } -void MainWindow::setDisplayFullscreen(const std::string& fullscreen_mode) -{ - u32 width, height; - float refresh_rate; - bool result = false; - - if (HostDisplay::ParseFullscreenMode(fullscreen_mode, &width, &height, &refresh_rate)) - { - result = g_host_display->SetFullscreen(true, width, height, refresh_rate); - if (result) - Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Acquired exclusive fullscreen."), 10.0f); - else - Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Failed to acquire exclusive fullscreen."), 10.0f); - } -} - -void MainWindow::displaySizeRequested(qint32 width, qint32 height) +void MainWindow::displayResizeRequested(qint32 width, qint32 height) { if (!m_display_widget) return; @@ -462,7 +400,7 @@ void MainWindow::displaySizeRequested(qint32 width, qint32 height) QtUtils::ResizePotentiallyFixedSizeWindow(this, width, height + extra_height); } -void MainWindow::destroyDisplay() +void MainWindow::releaseRenderWindow() { // Now we can safely destroy the display window. destroyDisplayWidget(true); @@ -470,6 +408,8 @@ void MainWindow::destroyDisplay() updateDisplayRelatedActions(false, false, false); + m_ui.actionViewSystemDisplay->setEnabled(false); + m_ui.actionFullscreen->setEnabled(false); m_ui.actionStartFullscreenUI->setEnabled(true); m_ui.actionStartFullscreenUI2->setEnabled(true); } @@ -532,7 +472,7 @@ void MainWindow::updateDisplayWidgetCursor() void MainWindow::updateDisplayRelatedActions(bool has_surface, bool render_to_main, bool fullscreen) { // rendering to main, or switched to gamelist/grid - m_ui.actionViewSystemDisplay->setEnabled((has_surface && render_to_main) || (!has_surface && g_host_display)); + m_ui.actionViewSystemDisplay->setEnabled((has_surface && render_to_main) || (!has_surface && g_gpu_device)); m_ui.menuWindowSize->setEnabled(has_surface && !fullscreen); m_ui.actionFullscreen->setEnabled(has_surface); @@ -1842,10 +1782,10 @@ bool MainWindow::isShowingGameList() const bool MainWindow::isRenderingFullscreen() const { - if (!g_host_display || !m_display_widget) + if (!g_gpu_device || !m_display_widget) return false; - return getDisplayContainer()->isFullScreen() || g_host_display->IsFullscreen(); + return getDisplayContainer()->isFullScreen(); } bool MainWindow::isRenderingToMain() const @@ -1999,12 +1939,11 @@ void MainWindow::connectSignals() Qt::QueuedConnection); connect(g_emu_thread, &EmuThread::errorReported, this, &MainWindow::reportError, Qt::BlockingQueuedConnection); connect(g_emu_thread, &EmuThread::messageConfirmed, this, &MainWindow::confirmMessage, Qt::BlockingQueuedConnection); - connect(g_emu_thread, &EmuThread::createDisplayRequested, this, &MainWindow::createDisplay, + connect(g_emu_thread, &EmuThread::onAcquireRenderWindowRequested, this, &MainWindow::acquireRenderWindow, Qt::BlockingQueuedConnection); - connect(g_emu_thread, &EmuThread::destroyDisplayRequested, this, &MainWindow::destroyDisplay); - connect(g_emu_thread, &EmuThread::updateDisplayRequested, this, &MainWindow::updateDisplay, + connect(g_emu_thread, &EmuThread::onReleaseRenderWindowRequested, this, &MainWindow::releaseRenderWindow); + connect(g_emu_thread, &EmuThread::onResizeRenderWindowRequested, this, &MainWindow::displayResizeRequested, Qt::BlockingQueuedConnection); - connect(g_emu_thread, &EmuThread::displaySizeRequested, this, &MainWindow::displaySizeRequested); connect(g_emu_thread, &EmuThread::focusDisplayWidgetRequested, this, &MainWindow::focusDisplayWidget); connect(g_emu_thread, &EmuThread::systemStarting, this, &MainWindow::onSystemStarting); connect(g_emu_thread, &EmuThread::systemStarted, this, &MainWindow::onSystemStarted); @@ -2415,6 +2354,7 @@ void MainWindow::closeEvent(QCloseEvent* event) if (!s_system_valid) { saveGeometryToConfig(); + g_emu_thread->stopFullscreenUI(); QMainWindow::closeEvent(event); return; } diff --git a/src/duckstation-qt/mainwindow.h b/src/duckstation-qt/mainwindow.h index 363adf490..c65efcaf3 100644 --- a/src/duckstation-qt/mainwindow.h +++ b/src/duckstation-qt/mainwindow.h @@ -2,6 +2,16 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + +#include "controllersettingsdialog.h" +#include "displaywidget.h" +#include "settingsdialog.h" +#include "ui_mainwindow.h" + +#include "core/types.h" + +#include "util/window_info.h" + #include #include #include @@ -10,13 +20,6 @@ #include #include -#include "controllersettingsdialog.h" -#include "common/window_info.h" -#include "core/types.h" -#include "displaywidget.h" -#include "settingsdialog.h" -#include "ui_mainwindow.h" - class QLabel; class QThread; class QProgressBar; @@ -29,7 +32,7 @@ class CheatManagerDialog; class DebuggerWindow; class MainWindow; -class HostDisplay; +class GPUDevice; namespace GameList { struct Entry; } @@ -110,10 +113,11 @@ public Q_SLOTS: private Q_SLOTS: void reportError(const QString& title, const QString& message); bool confirmMessage(const QString& title, const QString& message); - bool createDisplay(bool fullscreen, bool render_to_main); - bool updateDisplay(bool fullscreen, bool render_to_main, bool surfaceless); - void displaySizeRequested(qint32 width, qint32 height); - void destroyDisplay(); + + std::optional acquireRenderWindow(bool recreate_window, bool fullscreen, bool render_to_main, + bool surfaceless, bool use_main_window_pos); + void displayResizeRequested(qint32 width, qint32 height); + void releaseRenderWindow(); void focusDisplayWidget(); void onMouseModeRequested(bool relative_mode, bool hide_cursor); @@ -208,11 +212,10 @@ private: void restoreGeometryFromConfig(); void saveDisplayWindowGeometryToConfig(); void restoreDisplayWindowGeometryFromConfig(); - void createDisplayWidget(bool fullscreen, bool render_to_main, bool is_exclusive_fullscreen); + void createDisplayWidget(bool fullscreen, bool render_to_main, bool use_main_window_pos); void destroyDisplayWidget(bool show_game_list); void updateDisplayWidgetCursor(); void updateDisplayRelatedActions(bool has_surface, bool render_to_main, bool fullscreen); - void setDisplayFullscreen(const std::string& fullscreen_mode); SettingsDialog* getSettingsDialog(); void doSettings(const char* category = nullptr); diff --git a/src/duckstation-qt/postprocessingchainconfigwidget.cpp b/src/duckstation-qt/postprocessingchainconfigwidget.cpp index 41fbf9602..51e559eba 100644 --- a/src/duckstation-qt/postprocessingchainconfigwidget.cpp +++ b/src/duckstation-qt/postprocessingchainconfigwidget.cpp @@ -4,7 +4,9 @@ #include "postprocessingchainconfigwidget.h" #include "postprocessingshaderconfigwidget.h" #include "qthost.h" + #include "util/postprocessing_chain.h" + #include #include #include @@ -71,9 +73,9 @@ void PostProcessingChainConfigWidget::updateList() for (u32 i = 0; i < m_chain.GetStageCount(); i++) { - const FrontendCommon::PostProcessingShader& shader = m_chain.GetShaderStage(i); + const PostProcessingShader* shader = m_chain.GetShaderStage(i); - QListWidgetItem* item = new QListWidgetItem(QString::fromStdString(shader.GetName()), m_ui.shaders); + QListWidgetItem* item = new QListWidgetItem(QString::fromStdString(shader->GetName()), m_ui.shaders); item->setData(Qt::UserRole, QVariant(i)); } @@ -94,7 +96,7 @@ void PostProcessingChainConfigWidget::updateButtonStates(std::optional inde m_ui.clear->setEnabled(!m_chain.IsEmpty()); // m_ui.reload->setEnabled(!m_chain.IsEmpty()); m_ui.shaderSettings->setEnabled(index.has_value() && (index.value() < m_chain.GetStageCount()) && - m_chain.GetShaderStage(index.value()).HasOptions()); + m_chain.GetShaderStage(index.value())->HasOptions()); if (index.has_value()) { @@ -112,7 +114,7 @@ void PostProcessingChainConfigWidget::onAddButtonClicked() { QMenu menu; - const std::vector shaders(FrontendCommon::PostProcessingChain::GetAvailableShaderNames()); + const std::vector shaders(PostProcessingChain::GetAvailableShaderNames()); if (shaders.empty()) { menu.addAction(tr("No Shaders Available"))->setEnabled(false); @@ -198,7 +200,7 @@ void PostProcessingChainConfigWidget::onShaderConfigButtonClicked() std::optional index = getSelectedIndex(); if (index.has_value() && index.value() < m_chain.GetStageCount()) { - PostProcessingShaderConfigDialog shader_config(this, &m_chain.GetShaderStage(index.value())); + PostProcessingShaderConfigDialog shader_config(this, m_chain.GetShaderStage(index.value())); connect(&shader_config, &PostProcessingShaderConfigDialog::configChanged, [this]() { configChanged(); }); shader_config.exec(); } diff --git a/src/duckstation-qt/postprocessingchainconfigwidget.h b/src/duckstation-qt/postprocessingchainconfigwidget.h index f94138a19..1b90984f5 100644 --- a/src/duckstation-qt/postprocessingchainconfigwidget.h +++ b/src/duckstation-qt/postprocessingchainconfigwidget.h @@ -2,12 +2,15 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/types.h" #include "ui_postprocessingchainconfigwidget.h" + #include "util/postprocessing_chain.h" + +#include "common/types.h" + #include -#include #include +#include #include namespace FrontendCommon { @@ -22,7 +25,7 @@ public: PostProcessingChainConfigWidget(QWidget* parent); ~PostProcessingChainConfigWidget(); - ALWAYS_INLINE FrontendCommon::PostProcessingChain& getChain() { return m_chain; } + ALWAYS_INLINE PostProcessingChain& getChain() { return m_chain; } bool setConfigString(const std::string_view& config_string); void setOptionsButtonVisible(bool visible); @@ -51,5 +54,5 @@ private: Ui::PostProcessingChainConfigWidget m_ui; - FrontendCommon::PostProcessingChain m_chain; + PostProcessingChain m_chain; }; diff --git a/src/duckstation-qt/postprocessingsettingswidget.cpp b/src/duckstation-qt/postprocessingsettingswidget.cpp index 4e011d70d..7c75e6346 100644 --- a/src/duckstation-qt/postprocessingsettingswidget.cpp +++ b/src/duckstation-qt/postprocessingsettingswidget.cpp @@ -71,11 +71,11 @@ void PostProcessingSettingsWidget::updateShaderConfigPanel(s32 index) if (index < 0) return; - FrontendCommon::PostProcessingShader& shader = m_ui.widget->getChain().GetShaderStage(static_cast(index)); - if (!shader.HasOptions()) + PostProcessingShader* shader = m_ui.widget->getChain().GetShaderStage(static_cast(index)); + if (!shader->HasOptions()) return; - m_shader_config = new PostProcessingShaderConfigWidget(m_ui.scrollArea, &shader); + m_shader_config = new PostProcessingShaderConfigWidget(m_ui.scrollArea, shader); connect(m_shader_config, &PostProcessingShaderConfigWidget::configChanged, [this]() { onConfigChanged(m_ui.widget->getChain().GetConfigString()); }); m_ui.scrollArea->setWidget(m_shader_config); diff --git a/src/duckstation-qt/postprocessingshaderconfigwidget.cpp b/src/duckstation-qt/postprocessingshaderconfigwidget.cpp index 1d281698c..435fc7e17 100644 --- a/src/duckstation-qt/postprocessingshaderconfigwidget.cpp +++ b/src/duckstation-qt/postprocessingshaderconfigwidget.cpp @@ -8,10 +8,8 @@ #include #include -using FrontendCommon::PostProcessingShader; - PostProcessingShaderConfigWidget::PostProcessingShaderConfigWidget(QWidget* parent, - FrontendCommon::PostProcessingShader* shader) + PostProcessingShader* shader) : QWidget(parent), m_shader(shader) { createUi(); @@ -146,7 +144,7 @@ void PostProcessingShaderConfigWidget::onResetToDefaultsClicked() } PostProcessingShaderConfigDialog::PostProcessingShaderConfigDialog(QWidget* parent, - FrontendCommon::PostProcessingShader* shader) + PostProcessingShader* shader) : QDialog(parent) { setWindowTitle(tr("%1 Shader Options").arg(QString::fromStdString(shader->GetName()))); diff --git a/src/duckstation-qt/postprocessingshaderconfigwidget.h b/src/duckstation-qt/postprocessingshaderconfigwidget.h index 7cc4a540a..c1cdf06c0 100644 --- a/src/duckstation-qt/postprocessingshaderconfigwidget.h +++ b/src/duckstation-qt/postprocessingshaderconfigwidget.h @@ -2,7 +2,9 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "util/postprocessing_shader.h" + #include #include @@ -13,7 +15,7 @@ class PostProcessingShaderConfigWidget : public QWidget Q_OBJECT public: - PostProcessingShaderConfigWidget(QWidget* parent, FrontendCommon::PostProcessingShader* shader); + PostProcessingShaderConfigWidget(QWidget* parent, PostProcessingShader* shader); ~PostProcessingShaderConfigWidget(); QGridLayout* getLayout() { return m_layout; } @@ -28,7 +30,7 @@ private Q_SLOTS: protected: void createUi(); - FrontendCommon::PostProcessingShader* m_shader; + PostProcessingShader* m_shader; QGridLayout* m_layout; }; @@ -37,7 +39,7 @@ class PostProcessingShaderConfigDialog : public QDialog Q_OBJECT public: - PostProcessingShaderConfigDialog(QWidget* parent, FrontendCommon::PostProcessingShader* shader); + PostProcessingShaderConfigDialog(QWidget* parent, PostProcessingShader* shader); ~PostProcessingShaderConfigDialog(); Q_SIGNALS: diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index 188325140..1d2d79752 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -27,7 +27,6 @@ #include "common/log.h" #include "common/path.h" #include "common/string_util.h" -#include "common/window_info.h" #include "util/audio_stream.h" #include "util/imgui_manager.h" @@ -107,7 +106,9 @@ static bool s_start_fullscreen_ui_fullscreen = false; EmuThread* g_emu_thread; GDBServer* g_gdb_server; -EmuThread::EmuThread(QThread* ui_thread) : QThread(), m_ui_thread(ui_thread) {} +EmuThread::EmuThread(QThread* ui_thread) : QThread(), m_ui_thread(ui_thread) +{ +} EmuThread::~EmuThread() = default; @@ -336,24 +337,31 @@ void EmuThread::setInitialState(std::optional override_fullscreen) m_is_surfaceless = false; } +void EmuThread::checkForSettingsChanges(const Settings& old_settings) +{ + if (g_main_window) + { + QMetaObject::invokeMethod(g_main_window, &MainWindow::checkForSettingChanges, Qt::QueuedConnection); + updatePerformanceCounters(); + } + + if (g_gpu_device) + { + const bool render_to_main = shouldRenderToMain(); + if (m_is_rendering_to_main != render_to_main) + { + m_is_rendering_to_main = render_to_main; + g_gpu_device->UpdateWindow(); + } + } +} + void Host::CheckForSettingsChanges(const Settings& old_settings) { CommonHost::CheckForSettingsChanges(old_settings); g_emu_thread->checkForSettingsChanges(old_settings); } -void EmuThread::checkForSettingsChanges(const Settings& old_settings) -{ - const bool render_to_main = shouldRenderToMain(); - if (m_is_rendering_to_main != render_to_main) - { - m_is_rendering_to_main = render_to_main; - updateDisplayState(); - } - - QMetaObject::invokeMethod(g_main_window, &MainWindow::checkForSettingChanges, Qt::QueuedConnection); -} - void EmuThread::setDefaultSettings(bool system /* = true */, bool controller /* = true */) { if (isOnThread()) @@ -399,7 +407,7 @@ void Host::RequestResizeHostDisplay(s32 new_window_width, s32 new_window_height) if (g_emu_thread->isFullscreen()) return; - emit g_emu_thread->displaySizeRequested(new_window_width, new_window_height); + emit g_emu_thread->onResizeRenderWindowRequested(new_window_width, new_window_height); } void EmuThread::applySettings(bool display_osd_messages /* = false */) @@ -456,8 +464,10 @@ void EmuThread::startFullscreenUI() setInitialState(s_start_fullscreen_ui_fullscreen ? std::optional(true) : std::optional()); m_run_fullscreen_ui = true; - if (!acquireHostDisplay(Settings::GetRenderAPIForRenderer(g_settings.gpu_renderer))) + if (!Host::CreateGPUDevice(Settings::GetRenderAPIForRenderer(g_settings.gpu_renderer)) || !FullscreenUI::Initialize()) { + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); m_run_fullscreen_ui = false; return; } @@ -475,7 +485,7 @@ void EmuThread::stopFullscreenUI() QMetaObject::invokeMethod(this, &EmuThread::stopFullscreenUI, Qt::QueuedConnection); // wait until the host display is gone - while (g_host_display) + while (g_gpu_device) QApplication::processEvents(QEventLoop::ExcludeUserInputEvents, 1); return; @@ -484,11 +494,12 @@ void EmuThread::stopFullscreenUI() if (System::IsValid()) shutdownSystem(); - if (!g_host_display) + if (!g_gpu_device) return; m_run_fullscreen_ui = false; - releaseHostDisplay(); + Host::ReleaseGPUDevice(); + Host::ReleaseRenderWindow(); } void EmuThread::bootSystem(std::shared_ptr params) @@ -506,7 +517,7 @@ void EmuThread::bootSystem(std::shared_ptr params) return; // force a frame to be drawn to repaint the window - renderDisplay(false); + Host::InvalidateDisplay(); } void EmuThread::bootOrLoadState(std::string path) @@ -568,8 +579,8 @@ void EmuThread::onDisplayWindowMouseMoveEvent(bool relative, float x, float y) DebugAssert(isOnThread()); if (!relative) { - if (g_host_display) - g_host_display->SetMousePosition(static_cast(x), static_cast(y)); + if (g_gpu_device) + g_gpu_device->SetMousePosition(static_cast(x), static_cast(y)); InputManager::UpdatePointerAbsolutePosition(0, x, y); ImGuiManager::UpdateMousePosition(x, y); @@ -581,11 +592,11 @@ void EmuThread::onDisplayWindowMouseMoveEvent(bool relative, float x, float y) if (y != 0.0f) InputManager::UpdatePointerRelativeDelta(0, InputPointerAxis::Y, y); - if (g_host_display) + if (g_gpu_device) { - const float abs_x = static_cast(g_host_display->GetMousePositionX()) + x; - const float abs_y = static_cast(g_host_display->GetMousePositionY()) + y; - g_host_display->SetMousePosition(static_cast(abs_x), static_cast(abs_y)); + const float abs_x = static_cast(g_gpu_device->GetMousePositionX()) + x; + const float abs_y = static_cast(g_gpu_device->GetMousePositionY()) + y; + g_gpu_device->SetMousePosition(static_cast(abs_x), static_cast(abs_y)); ImGuiManager::UpdateMousePosition(abs_x, abs_y); } } @@ -612,33 +623,9 @@ void EmuThread::onDisplayWindowMouseWheelEvent(const QPoint& delta_angle) InputManager::UpdatePointerRelativeDelta(0, InputPointerAxis::WheelY, dy); } -void EmuThread::onDisplayWindowResized(int width, int height) +void EmuThread::onDisplayWindowResized(int width, int height, float scale) { - // this can be null if it was destroyed and the main thread is late catching up - if (!g_host_display) - return; - - Log_DevPrintf("Display window resized to %dx%d", width, height); - g_host_display->ResizeWindow(width, height); - ImGuiManager::WindowResized(); - System::HostDisplayResized(); - - // re-render the display, since otherwise it will be out of date and stretched if paused - if (System::IsValid()) - { - if (m_is_exclusive_fullscreen && !g_host_display->IsFullscreen()) - { - // we lost exclusive fullscreen, switch to borderless - Host::AddOSDMessage(TRANSLATE_STR("OSDMessage", "Lost exclusive fullscreen."), 10.0f); - m_is_exclusive_fullscreen = false; - m_is_fullscreen = false; - m_lost_exclusive_fullscreen = true; - } - - // force redraw if we're paused - if (!System::IsRunning() && !FullscreenUI::HasActiveWindow()) - renderDisplay(false); - } + Host::ResizeDisplayWindow(width, height, scale); } void EmuThread::redrawDisplayWindow() @@ -649,10 +636,10 @@ void EmuThread::redrawDisplayWindow() return; } - if (!g_host_display || System::IsShutdown()) + if (!g_gpu_device || System::IsShutdown()) return; - renderDisplay(false); + Host::RenderDisplay(false); } void EmuThread::toggleFullscreen() @@ -663,22 +650,24 @@ void EmuThread::toggleFullscreen() return; } - setFullscreen(!m_is_fullscreen); + setFullscreen(!m_is_fullscreen, true); } -void EmuThread::setFullscreen(bool fullscreen) +void EmuThread::setFullscreen(bool fullscreen, bool allow_render_to_main) { if (!isOnThread()) { - QMetaObject::invokeMethod(this, "setFullscreen", Qt::QueuedConnection, Q_ARG(bool, fullscreen)); + QMetaObject::invokeMethod(this, "setFullscreen", Qt::QueuedConnection, Q_ARG(bool, fullscreen), + Q_ARG(bool, allow_render_to_main)); return; } - if (!g_host_display || m_is_fullscreen == fullscreen) + if (!g_gpu_device || m_is_fullscreen == fullscreen) return; m_is_fullscreen = fullscreen; - updateDisplayState(); + m_is_rendering_to_main = allow_render_to_main && shouldRenderToMain(); + Host::UpdateDisplayWindow(); } bool Host::IsFullscreen() @@ -688,7 +677,7 @@ bool Host::IsFullscreen() void Host::SetFullscreen(bool enabled) { - g_emu_thread->setFullscreen(enabled); + g_emu_thread->setFullscreen(enabled, true); } void EmuThread::setSurfaceless(bool surfaceless) @@ -699,11 +688,11 @@ void EmuThread::setSurfaceless(bool surfaceless) return; } - if (!g_host_display || m_is_surfaceless == surfaceless) + if (!g_gpu_device || m_is_surfaceless == surfaceless) return; m_is_surfaceless = surfaceless; - updateDisplayState(); + Host::UpdateDisplayWindow(); } void EmuThread::requestDisplaySize(float scale) @@ -720,52 +709,25 @@ void EmuThread::requestDisplaySize(float scale) System::RequestDisplaySize(scale); } -bool EmuThread::acquireHostDisplay(RenderAPI api) +std::optional EmuThread::acquireRenderWindow(bool recreate_window) { - if (g_host_display) - { - if (g_host_display->GetRenderAPI() == api) - { - // current is fine - return true; - } + DebugAssert(g_gpu_device); + u32 fs_width, fs_height; + float fs_refresh_rate; + m_is_exclusive_fullscreen = (m_is_fullscreen && g_gpu_device->SupportsExclusiveFullscreen() && + GPUDevice::GetRequestedExclusiveFullscreenMode(&fs_width, &fs_height, &fs_refresh_rate)); - // otherwise we need to switch - releaseHostDisplay(); - } + const bool window_fullscreen = m_is_fullscreen && !m_is_exclusive_fullscreen; + const bool render_to_main = !m_is_exclusive_fullscreen && !window_fullscreen && m_is_rendering_to_main; + const bool use_main_window_pos = m_is_exclusive_fullscreen && shouldRenderToMain(); - g_host_display = Host::CreateDisplayForAPI(api); - if (!g_host_display) - return false; + return emit onAcquireRenderWindowRequested(recreate_window, window_fullscreen, render_to_main, m_is_surfaceless, + use_main_window_pos); +} - if (!createDisplayRequested(m_is_fullscreen, m_is_rendering_to_main)) - { - emit destroyDisplayRequested(); - g_host_display.reset(); - return false; - } - - if (!g_host_display->MakeCurrent() || !g_host_display->SetupDevice() || !ImGuiManager::Initialize() || - !CommonHost::CreateHostDisplayResources()) - { - ImGuiManager::Shutdown(); - CommonHost::ReleaseHostDisplayResources(); - g_host_display.reset(); - emit destroyDisplayRequested(); - return false; - } - - m_is_exclusive_fullscreen = g_host_display->IsFullscreen(); - - if (m_run_fullscreen_ui && !FullscreenUI::Initialize()) - { - Log_ErrorPrint("Failed to initialize fullscreen UI"); - releaseHostDisplay(); - m_run_fullscreen_ui = false; - return false; - } - - return true; +void EmuThread::releaseRenderWindow() +{ + emit onReleaseRenderWindowRequested(); } void EmuThread::connectDisplaySignals(DisplayWidget* widget) @@ -781,46 +743,6 @@ void EmuThread::connectDisplaySignals(DisplayWidget* widget) connect(widget, &DisplayWidget::windowMouseWheelEvent, this, &EmuThread::onDisplayWindowMouseWheelEvent); } -void EmuThread::updateDisplayState() -{ - if (!g_host_display) - return; - - // this expects the context to get moved back to us afterwards - g_host_display->DoneCurrent(); - - updateDisplayRequested(m_is_fullscreen, m_is_rendering_to_main && !m_is_fullscreen, m_is_surfaceless); - if (!g_host_display->MakeCurrent()) - Panic("Failed to make device context current after updating"); - - m_is_exclusive_fullscreen = g_host_display->IsFullscreen(); - ImGuiManager::WindowResized(); - System::HostDisplayResized(); - - if (!System::IsShutdown()) - { - System::UpdateSoftwareCursor(); - - if (!FullscreenUI::IsInitialized() || System::IsPaused()) - redrawDisplayWindow(); - } - - System::UpdateSpeedLimiterState(); -} - -void EmuThread::releaseHostDisplay() -{ - if (!g_host_display) - return; - - CommonHost::ReleaseHostDisplayResources(); - FullscreenUI::Shutdown(); - ImGuiManager::Shutdown(); - g_host_display.reset(); - emit destroyDisplayRequested(); - m_is_fullscreen = false; -} - void Host::OnSystemStarting() { CommonHost::OnSystemStarting(); @@ -844,7 +766,7 @@ void Host::OnSystemPaused() emit g_emu_thread->systemPaused(); g_emu_thread->startBackgroundControllerPollTimer(); - g_emu_thread->renderDisplay(false); + Host::InvalidateDisplay(); } void Host::OnSystemResumed() @@ -1246,7 +1168,7 @@ void EmuThread::singleStepCPU() return; System::SingleStepCPU(); - renderDisplay(false); + Host::InvalidateDisplay(); } void EmuThread::dumpRAM(const QString& filename) @@ -1449,11 +1371,11 @@ void EmuThread::run() m_event_loop->processEvents(QEventLoop::AllEvents); CommonHost::PumpMessagesOnCPUThread(); - if (g_host_display) + if (g_gpu_device) { - renderDisplay(false); - if (!g_host_display->IsVsyncEnabled()) - g_host_display->ThrottlePresentation(); + Host::RenderDisplay(false); + if (!g_gpu_device->IsVsyncEnabled()) + g_gpu_device->ThrottlePresentation(); } } } @@ -1468,35 +1390,8 @@ void EmuThread::run() moveToThread(m_ui_thread); } -void EmuThread::renderDisplay(bool skip_present) +void Host::BeginPresentFrame() { - // acquire for IO.MousePos. - std::atomic_thread_fence(std::memory_order_acquire); - - if (!skip_present) - { - FullscreenUI::Render(); - ImGuiManager::RenderTextOverlays(); - ImGuiManager::RenderOSDMessages(); - } - - // Debug windows are always rendered, otherwise mouse input breaks on skip. - ImGuiManager::RenderOverlayWindows(); - ImGuiManager::RenderDebugWindows(); - - g_host_display->Render(skip_present); - - ImGuiManager::NewFrame(); -} - -void Host::InvalidateDisplay() -{ - g_emu_thread->renderDisplay(false); -} - -void Host::RenderDisplay(bool skip_present) -{ - g_emu_thread->renderDisplay(skip_present); } void EmuThread::wakeThread() @@ -1605,39 +1500,34 @@ void Host::CommitBaseSettingChanges() QtHost::QueueSettingsSave(); } -bool Host::AcquireHostDisplay(RenderAPI api) +std::optional Host::AcquireRenderWindow(bool recreate_window) { - return g_emu_thread->acquireHostDisplay(api); + return g_emu_thread->acquireRenderWindow(recreate_window); } -void Host::ReleaseHostDisplay() +void Host::ReleaseRenderWindow() { - if (g_emu_thread->isRunningFullscreenUI()) - { - // keep display alive when running fsui - return; - } - - g_emu_thread->releaseHostDisplay(); + g_emu_thread->releaseRenderWindow(); } void EmuThread::updatePerformanceCounters() { - GPURenderer renderer = GPURenderer::Count; + const RenderAPI render_api = g_gpu_device ? g_gpu_device->GetRenderAPI() : RenderAPI::None; + const bool hardware_renderer = g_gpu && g_gpu->IsHardwareRenderer(); u32 render_width = 0; u32 render_height = 0; if (g_gpu) - { - renderer = g_gpu->GetRendererType(); std::tie(render_width, render_height) = g_gpu->GetEffectiveDisplayResolution(); - } - if (renderer != m_last_renderer) + if (render_api != m_last_render_api || hardware_renderer != m_last_hardware_renderer) { + const QString renderer_str = hardware_renderer ? QString::fromUtf8(GPUDevice::RenderAPIToString(render_api)) : + qApp->translate("GPURenderer", "Software"); QMetaObject::invokeMethod(g_main_window->getStatusRendererWidget(), "setText", Qt::QueuedConnection, - Q_ARG(const QString&, QString::fromUtf8(Settings::GetRendererName(renderer)))); - m_last_renderer = renderer; + Q_ARG(const QString&, renderer_str)); + m_last_render_api = render_api; + m_last_hardware_renderer = hardware_renderer; } if (render_width != m_last_render_width || render_height != m_last_render_height) { @@ -1674,7 +1564,8 @@ void EmuThread::resetPerformanceCounters() m_last_video_fps = std::numeric_limits::infinity(); m_last_render_width = std::numeric_limits::max(); m_last_render_height = std::numeric_limits::max(); - m_last_renderer = GPURenderer::Count; + m_last_render_api = RenderAPI::None; + m_last_hardware_renderer = false; QString blank; QMetaObject::invokeMethod(g_main_window->getStatusRendererWidget(), "setText", Qt::QueuedConnection, diff --git a/src/duckstation-qt/qthost.h b/src/duckstation-qt/qthost.h index 3b85afc20..db70915bd 100644 --- a/src/duckstation-qt/qthost.h +++ b/src/duckstation-qt/qthost.h @@ -2,16 +2,20 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + +#include "gdbserver.h" +#include "qtutils.h" + #include "core/host.h" -#include "util/host_display.h" #include "core/host_settings.h" #include "core/system.h" #include "core/types.h" #include "core/common_host.h" #include "core/game_list.h" + +#include "util/gpu_device.h" #include "util/input_manager.h" -#include "gdbserver.h" -#include "qtutils.h" + #include #include #include @@ -39,7 +43,7 @@ class QTranslator; class INISettingsInterface; -class HostDisplay; +class GPUDevice; class MainWindow; class DisplayWidget; @@ -95,10 +99,9 @@ public: ALWAYS_INLINE bool isSurfaceless() const { return m_is_surfaceless; } ALWAYS_INLINE bool isRunningFullscreenUI() const { return m_run_fullscreen_ui; } - bool acquireHostDisplay(RenderAPI api); + std::optional acquireRenderWindow(bool recreate_window); void connectDisplaySignals(DisplayWidget* widget); - void releaseHostDisplay(); - void renderDisplay(bool skip_present); + void releaseRenderWindow(); void startBackgroundControllerPollTimer(); void stopBackgroundControllerPollTimer(); @@ -132,11 +135,11 @@ Q_SIGNALS: void systemPaused(); void systemResumed(); void gameListRefreshed(); - bool createDisplayRequested(bool fullscreen, bool render_to_main); - bool updateDisplayRequested(bool fullscreen, bool render_to_main, bool surfaceless); - void displaySizeRequested(qint32 width, qint32 height); + std::optional onAcquireRenderWindowRequested(bool recreate_window, bool fullscreen, bool render_to_main, + bool surfaceless, bool use_main_window_pos); + void onResizeRenderWindowRequested(qint32 width, qint32 height); + void onReleaseRenderWindowRequested(); void focusDisplayWidgetRequested(); - void destroyDisplayRequested(); void runningGameChanged(const QString& filename, const QString& game_serial, const QString& game_title); void inputProfileLoaded(); void mouseModeRequested(bool relative, bool hide_cursor); @@ -180,7 +183,7 @@ public Q_SLOTS: void saveScreenshot(); void redrawDisplayWindow(); void toggleFullscreen(); - void setFullscreen(bool fullscreen); + void setFullscreen(bool fullscreen, bool allow_render_to_main); void setSurfaceless(bool surfaceless); void requestDisplaySize(float scale); void loadCheatList(const QString& filename); @@ -194,7 +197,7 @@ private Q_SLOTS: void onDisplayWindowMouseMoveEvent(bool relative, float x, float y); void onDisplayWindowMouseButtonEvent(int button, bool pressed); void onDisplayWindowMouseWheelEvent(const QPoint& delta_angle); - void onDisplayWindowResized(int width, int height); + void onDisplayWindowResized(int width, int height, float scale); void onDisplayWindowKeyEvent(int key, bool pressed); void onDisplayWindowTextEntered(const QString& text); void doBackgroundControllerPoll(); @@ -210,7 +213,6 @@ private: void createBackgroundControllerPollTimer(); void destroyBackgroundControllerPollTimer(); void setInitialState(std::optional override_fullscreen); - void updateDisplayState(); QThread* m_ui_thread; QSemaphore m_started_semaphore; @@ -233,7 +235,8 @@ private: float m_last_video_fps = std::numeric_limits::infinity(); u32 m_last_render_width = std::numeric_limits::max(); u32 m_last_render_height = std::numeric_limits::max(); - GPURenderer m_last_renderer = GPURenderer::Count; + RenderAPI m_last_render_api = RenderAPI::None; + bool m_last_hardware_renderer = false; }; extern EmuThread* g_emu_thread; diff --git a/src/duckstation-qt/qtutils.h b/src/duckstation-qt/qtutils.h index 5a897f21c..8ed8c66c1 100644 --- a/src/duckstation-qt/qtutils.h +++ b/src/duckstation-qt/qtutils.h @@ -2,8 +2,11 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/window_info.h" + +#include "util/window_info.h" + #include "common/types.h" + #include #include #include diff --git a/src/duckstation-regtest/regtest_host.cpp b/src/duckstation-regtest/regtest_host.cpp index a4612ef61..e1699a751 100644 --- a/src/duckstation-regtest/regtest_host.cpp +++ b/src/duckstation-regtest/regtest_host.cpp @@ -14,7 +14,7 @@ #include "core/host_settings.h" #include "core/system.h" #include "scmversion/scmversion.h" -#include "util/host_display.h" +#include "util/gpu_device.h" #include "util/imgui_manager.h" #include "util/input_manager.h" #include @@ -88,6 +88,7 @@ bool RegTestHost::InitializeConfig() SettingsInterface& si = *s_base_settings_interface.get(); g_settings.Save(si); si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(GPURenderer::Software)); + si.SetBoolValue("GPU", "DisableShaderCache", true); si.SetStringValue("Pad1", "Type", Settings::GetControllerTypeName(ControllerType::AnalogController)); si.SetStringValue("Pad2", "Type", Settings::GetControllerTypeName(ControllerType::None)); si.SetStringValue("MemoryCards", "Card1Type", Settings::GetMemoryCardTypeName(MemoryCardType::NonPersistent)); @@ -288,58 +289,26 @@ void Host::SetFullscreen(bool enabled) // } -bool Host::AcquireHostDisplay(RenderAPI api) +std::optional Host::AcquireRenderWindow(bool recreate_window) { WindowInfo wi; wi.SetSurfaceless(); - - g_host_display = Host::CreateDisplayForAPI(api); - if (g_host_display && !g_host_display->CreateDevice(wi, false)) - { - Log_ErrorPrintf("Failed to create host display."); - g_host_display.reset(); - return false; - } - - if (!g_host_display->MakeCurrent() || !g_host_display->SetupDevice() || !ImGuiManager::Initialize() || - !CommonHost::CreateHostDisplayResources()) - { - Log_ErrorPrintf("Failed to setup host display."); - ImGuiManager::Shutdown(); - CommonHost::ReleaseHostDisplayResources(); - g_host_display.reset(); - return false; - } - - return true; + return wi; } -void Host::ReleaseHostDisplay() +void Host::ReleaseRenderWindow() { - if (!g_host_display) - return; - - CommonHost::ReleaseHostDisplayResources(); - ImGuiManager::Shutdown(); - g_host_display.reset(); + // } -void Host::RenderDisplay(bool skip_present) +void Host::BeginPresentFrame() { const u32 frame = System::GetFrameNumber(); if (s_frame_dump_interval > 0 && (s_frame_dump_interval == 1 || (frame % s_frame_dump_interval) == 0)) { std::string dump_filename(RegTestHost::GetFrameDumpFilename(frame)); - g_host_display->WriteDisplayTextureToFile(std::move(dump_filename)); + g_gpu_device->WriteDisplayTextureToFile(std::move(dump_filename)); } - - g_host_display->Render(true); - ImGuiManager::NewFrame(); -} - -void Host::InvalidateDisplay() -{ - // } void Host::OpenURL(const std::string_view& url) diff --git a/src/scmversion/gen_scmversion.sh b/src/scmversion/gen_scmversion.sh index 3d25e5ede..9c1dacaba 100755 --- a/src/scmversion/gen_scmversion.sh +++ b/src/scmversion/gen_scmversion.sh @@ -4,7 +4,7 @@ VERSION_FILE="scmversion.cpp" CURDIR=$(pwd) if [ "$(uname -s)" = "Darwin" ]; then - cd "$(dirname $(python -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$0"))" + cd "$(dirname $(python3 -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' "$0"))" else cd $(dirname $(readlink -f $0)) fi diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 1a8ef9515..e2f9c4ed4 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -21,8 +21,12 @@ add_library(util cd_xa.h cue_parser.cpp cue_parser.h - host_display.cpp - host_display.h + gpu_device.cpp + gpu_device.h + gpu_shader_cache.cpp + gpu_shader_cache.h + gpu_texture.cpp + gpu_texture.h imgui_fullscreen.cpp imgui_fullscreen.h imgui_manager.cpp @@ -46,8 +50,8 @@ add_library(util postprocessing_chain.h postprocessing_shader.cpp postprocessing_shader.h - postprocessing_shadergen.cpp - postprocessing_shadergen.h + postprocessing_shader_glsl.cpp + postprocessing_shader_glsl.h shadergen.cpp shadergen.h shiftjis.cpp @@ -56,12 +60,14 @@ add_library(util state_wrapper.h wav_writer.cpp wav_writer.h + window_info.cpp + window_info.h ) target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") target_link_libraries(util PUBLIC common simpleini imgui) -target_link_libraries(util PRIVATE stb libchdr zlib soundtouch) +target_link_libraries(util PRIVATE stb libchdr zlib soundtouch Zstd::Zstd) if(ENABLE_CUBEB) target_sources(util PRIVATE @@ -72,23 +78,118 @@ if(ENABLE_CUBEB) target_link_libraries(util PRIVATE cubeb) endif() +if(USE_X11) + target_compile_definitions(util PRIVATE "-DUSE_X11=1") + target_include_directories(util PRIVATE "${X11_INCLUDE_DIR}" "${X11_Xrandr_INCLUDE_PATH}") + target_link_libraries(util PRIVATE "${X11_LIBRARIES}" "${X11_Xrandr_LIB}") +endif() + +if(USE_WAYLAND) + target_compile_definitions(util PRIVATE "-DUSE_WAYLAND=1") +elseif(SUPPORTS_WAYLAND) + message(WARNING "Wayland support for renderers is disabled.\nDuckStation will FAIL to start on Wayland.") +endif() + if(ENABLE_OPENGL) target_sources(util PRIVATE - opengl_host_display.cpp - opengl_host_display.h - imgui_impl_opengl3.cpp - imgui_impl_opengl3.h + gl/context.cpp + gl/context.h + opengl_device.cpp + opengl_device.h + opengl_loader.h + opengl_pipeline.cpp + opengl_pipeline.h + opengl_stream_buffer.cpp + opengl_stream_buffer.h + opengl_texture.cpp + opengl_texture.h ) + target_compile_definitions(util PUBLIC "WITH_OPENGL=1") target_link_libraries(util PRIVATE glad) + + if(WIN32) + target_sources(util PRIVATE + gl/context_wgl.cpp + gl/context_wgl.h + ) + endif() + + if(LINUX OR FREEBSD OR ANDROID) + target_sources(util PRIVATE + gl/context_egl.cpp + gl/context_egl.h + ) + target_compile_definitions(util PRIVATE "-DUSE_EGL=1") + + if(USE_X11) + target_sources(util PRIVATE + gl/context_egl_x11.cpp + gl/context_egl_x11.h + ) + + # We set EGL_NO_X11 because otherwise X comes in with its macros and breaks + # a bunch of files from compiling, if we include the EGL headers. This just + # makes the data types opaque, we can still use it with X11 if needed. + target_compile_definitions(util PRIVATE "-DEGL_NO_X11=1") + endif() + if(USE_WAYLAND) + target_sources(util PRIVATE + gl/context_egl_wayland.cpp + gl/context_egl_wayland.h + ) + endif() + if(ANDROID) + target_sources(util PRIVATE + gl/context_egl_android.cpp + gl/context_egl_android.h + ) + endif() + endif() + + if(APPLE) + target_sources(util PRIVATE + gl/context_agl.mm + gl/context_agl.h + ) + endif() +endif() + +if(ENABLE_VULKAN OR APPLE) + target_sources(util PRIVATE + spirv_compiler.cpp + spirv_compiler.h + ) + target_link_libraries(util PRIVATE glslang) + if(APPLE) + target_link_libraries(util PRIVATE spirv-cross) + endif() endif() if(ENABLE_VULKAN) target_sources(util PRIVATE - imgui_impl_vulkan.cpp - imgui_impl_vulkan.h - vulkan_host_display.cpp - vulkan_host_display.h + vulkan_builders.cpp + vulkan_builders.h + vulkan_device.cpp + vulkan_device.h + vulkan_entry_points.h + vulkan_entry_points.inl + vulkan_loader.cpp + vulkan_loader.h + vulkan_pipeline.cpp + vulkan_pipeline.h + vulkan_stream_buffer.cpp + vulkan_stream_buffer.h + vulkan_swap_chain.cpp + vulkan_swap_chain.h + vulkan_texture.cpp + vulkan_texture.h ) + target_compile_definitions(util PUBLIC "WITH_VULKAN=1") + + if(APPLE) + # Needed for Vulkan Swap Chain. + target_link_libraries(util PRIVATE "objc") + endif() endif() if(SDL2_FOUND) @@ -107,11 +208,6 @@ if(SDL2_FOUND) endif() endif() -if(USE_X11) - target_compile_definitions(util PRIVATE "-DUSE_X11=1") - target_include_directories(util PRIVATE "${X11_INCLUDE_DIR}") -endif() - if(USE_DBUS) target_compile_definitions(util PRIVATE USE_DBUS) find_package(PkgConfig REQUIRED) @@ -122,16 +218,8 @@ endif() if(WIN32) target_sources(util PRIVATE - d3d11_host_display.cpp - d3d11_host_display.h - d3d12_host_display.cpp - d3d12_host_display.h dinput_source.cpp dinput_source.h - imgui_impl_dx11.cpp - imgui_impl_dx11.h - imgui_impl_dx12.cpp - imgui_impl_dx12.h platform_misc_win32.cpp win32_raw_input_source.cpp win32_raw_input_source.h @@ -140,13 +228,21 @@ if(WIN32) xinput_source.cpp xinput_source.h ) - target_link_libraries(util PRIVATE d3d11.lib dxgi.lib winmm.lib) + target_link_libraries(util PRIVATE d3d11.lib d3d12.lib d3dcompiler.lib dxgi.lib winmm.lib) elseif(APPLE) - find_library(IOK_LIBRARY IOKit REQUIRED) - target_link_libraries(util PRIVATE "${IOK_LIBRARY}") target_sources(util PRIVATE + cocoa_tools.h + cocoa_tools.mm + metal_device.h + metal_device.mm + metal_stream_buffer.h + metal_stream_buffer.mm platform_misc_mac.mm ) + find_library(IOK_LIBRARY IOKit REQUIRED) + find_library(METAL_LIBRARY Metal) + find_library(QUARTZCORE_LIBRARY QuartzCore) + target_link_libraries(util PRIVATE ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY} ${IOK_LIBRARY}) elseif(NOT ANDROID) target_sources(util PRIVATE platform_misc_unix.cpp diff --git a/src/util/cocoa_tools.h b/src/util/cocoa_tools.h new file mode 100644 index 000000000..5955a5c90 --- /dev/null +++ b/src/util/cocoa_tools.h @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include + +#include + +namespace CocoaTools { + NSString* StringViewToNSString(const std::string_view& str); +} + diff --git a/src/util/cocoa_tools.mm b/src/util/cocoa_tools.mm new file mode 100644 index 000000000..0464349ac --- /dev/null +++ b/src/util/cocoa_tools.mm @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "cocoa_tools.h" + +NSString* CocoaTools::StringViewToNSString(const std::string_view& str) +{ + if (str.empty()) + return nil; + + return [[[NSString alloc] initWithBytes:str.data() + length:static_cast(str.length()) + encoding:NSUTF8StringEncoding] autorelease]; +} + diff --git a/src/util/cubeb_audio_stream.cpp b/src/util/cubeb_audio_stream.cpp index b5fdc468f..bb8082165 100644 --- a/src/util/cubeb_audio_stream.cpp +++ b/src/util/cubeb_audio_stream.cpp @@ -15,7 +15,6 @@ Log_SetChannel(CubebAudioStream); #ifdef _WIN32 #include "common/windows_headers.h" #include -#pragma comment(lib, "Ole32.lib") #endif static void StateCallback(cubeb_stream* stream, void* user_ptr, cubeb_state state); diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp new file mode 100644 index 000000000..3c3cbb480 --- /dev/null +++ b/src/util/d3d11_device.cpp @@ -0,0 +1,1004 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d11_device.h" +#include "core/host_settings.h" // TODO: Remove me +#include "d3d11_pipeline.h" +#include "d3d11_texture.h" +#include "d3d_common.h" +#include "postprocessing_chain.h" // TODO: Remove me + +#include "common/align.h" +#include "common/assert.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/path.h" +#include "common/rectangle.h" +#include "common/string_util.h" + +#include "fmt/format.h" + +#include +#include +#include + +Log_SetChannel(D3D11Device); + +// We need to synchronize instance creation because of adapter enumeration from the UI thread. +static std::mutex s_instance_mutex; + +static constexpr std::array s_clear_color = {}; +static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8; + +void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view& name) +{ +#ifdef _DEBUG + // WKPDID_D3DDebugObjectName + static constexpr GUID guid = {0x429b8c22, 0x9188, 0x4b0c, 0x87, 0x42, 0xac, 0xb0, 0xbf, 0x85, 0xc2, 0x00}; + + UINT existing_data_size; + HRESULT hr = obj->GetPrivateData(guid, &existing_data_size, nullptr); + if (SUCCEEDED(hr) && existing_data_size > 0) + return; + + obj->SetPrivateData(guid, static_cast(name.length()), name.data()); +#endif +} + +D3D11Device::D3D11Device() = default; + +D3D11Device::~D3D11Device() +{ + // Should all be torn down by now. + Assert(!m_device); +} + +RenderAPI D3D11Device::GetRenderAPI() const +{ + return RenderAPI::D3D11; +} + +bool D3D11Device::HasSurface() const +{ + return static_cast(m_swap_chain); +} + +bool D3D11Device::CreateDevice(const std::string_view& adapter, bool threaded_presentation) +{ + std::unique_lock lock(s_instance_mutex); + + UINT create_flags = 0; + if (m_debug_device) + create_flags |= D3D11_CREATE_DEVICE_DEBUG; + + m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device); + if (!m_dxgi_factory) + return false; + + ComPtr dxgi_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); + + static constexpr std::array requested_feature_levels = { + {D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}}; + + ComPtr temp_device; + ComPtr temp_context; + HRESULT hr = + D3D11CreateDevice(dxgi_adapter.Get(), dxgi_adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE, nullptr, + create_flags, requested_feature_levels.data(), static_cast(requested_feature_levels.size()), + D3D11_SDK_VERSION, temp_device.GetAddressOf(), nullptr, temp_context.GetAddressOf()); + + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create D3D device: 0x%08X", hr); + return false; + } + else if (FAILED(hr = temp_device.As(&m_device)) || FAILED(hr = temp_context.As(&m_context))) + { + Log_ErrorPrintf("Failed to get D3D11.1 device: 0x%08X", hr); + return false; + } + + // we re-grab these later, see below + dxgi_adapter.Reset(); + temp_context.Reset(); + temp_device.Reset(); + + if (m_debug_device && IsDebuggerPresent()) + { + ComPtr info; + hr = m_device.As(&info); + if (SUCCEEDED(hr)) + { + info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_ERROR, TRUE); + info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_WARNING, TRUE); + } + } + +#ifdef _DEBUG + if (m_debug_device) + m_context.As(&m_annotation); +#endif + + ComPtr dxgi_device; + if (SUCCEEDED(m_device.As(&dxgi_device)) && + SUCCEEDED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.GetAddressOf())))) + Log_InfoPrintf("D3D Adapter: %s", D3DCommon::GetAdapterName(dxgi_adapter.Get()).c_str()); + else + Log_ErrorPrint("Failed to obtain D3D adapter name."); + + BOOL allow_tearing_supported = false; + hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, + sizeof(allow_tearing_supported)); + m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); + + SetFeatures(); + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()) + return false; + + if (!CreateBuffers()) + return false; + + return true; +} + +void D3D11Device::DestroyDevice() +{ + std::unique_lock lock(s_instance_mutex); + + DestroyStagingBuffer(); + DestroyBuffers(); + m_context.Reset(); + m_device.Reset(); +} + +void D3D11Device::SetFeatures() +{ + const D3D_FEATURE_LEVEL feature_level = m_device->GetFeatureLevel(); + + m_max_texture_size = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + m_max_multisamples = 1; + for (u32 multisamples = 2; multisamples < D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) + { + UINT num_quality_levels; + if (SUCCEEDED( + m_device->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, multisamples, &num_quality_levels)) && + num_quality_levels > 0) + { + m_max_multisamples = multisamples; + } + } + + m_features.dual_source_blend = true; + m_features.per_sample_shading = (feature_level >= D3D_FEATURE_LEVEL_10_1); + m_features.noperspective_interpolation = true; + m_features.supports_texture_buffers = true; + m_features.texture_buffers_emulated_with_ssbo = false; + m_features.partial_msaa_resolve = false; + m_features.gpu_timing = true; + m_features.shader_cache = true; + m_features.pipeline_cache = false; +} + +bool D3D11Device::CreateSwapChain() +{ + if (m_window_info.type != WindowInfo::Type::Win32) + return false; + + const DXGI_FORMAT dxgi_format = D3DCommon::GetFormatMapping(s_swap_chain_format).resource_format; + + const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); + RECT client_rc{}; + GetClientRect(window_hwnd, &client_rc); + + DXGI_MODE_DESC fullscreen_mode = {}; + ComPtr fullscreen_output; + if (Host::IsFullscreen()) + { + u32 fullscreen_width, fullscreen_height; + float fullscreen_refresh_rate; + m_is_exclusive_fullscreen = + GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) && + D3DCommon::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.Get(), client_rc, fullscreen_width, + fullscreen_height, fullscreen_refresh_rate, dxgi_format, + &fullscreen_mode, fullscreen_output.GetAddressOf()); + } + else + { + m_is_exclusive_fullscreen = false; + } + + m_using_flip_model_swap_chain = + !Host::GetBoolSettingValue("Display", "UseBlitSwapChain", false) || m_is_exclusive_fullscreen; + + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; + swap_chain_desc.Width = static_cast(client_rc.right - client_rc.left); + swap_chain_desc.Height = static_cast(client_rc.bottom - client_rc.top); + swap_chain_desc.Format = dxgi_format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD; + + m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen); + if (m_using_allow_tearing) + swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + + HRESULT hr = S_OK; + + if (m_is_exclusive_fullscreen) + { + DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc; + DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {}; + + fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + fs_sd_desc.Width = fullscreen_mode.Width; + fs_sd_desc.Height = fullscreen_mode.Height; + fs_desc.RefreshRate = fullscreen_mode.RefreshRate; + fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering; + fs_desc.Scaling = fullscreen_mode.Scaling; + fs_desc.Windowed = FALSE; + + Log_VerbosePrintf("Creating a %dx%d exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height); + hr = m_dxgi_factory->CreateSwapChainForHwnd(m_device.Get(), window_hwnd, &fs_sd_desc, &fs_desc, + fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_WarningPrintf("Failed to create fullscreen swap chain, trying windowed."); + m_is_exclusive_fullscreen = false; + m_using_allow_tearing = m_allow_tearing_supported && m_using_flip_model_swap_chain; + } + } + + if (!m_is_exclusive_fullscreen) + { + Log_VerbosePrintf("Creating a %dx%d %s windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height, + m_using_flip_model_swap_chain ? "flip-discard" : "discard"); + hr = m_dxgi_factory->CreateSwapChainForHwnd(m_device.Get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, + m_swap_chain.ReleaseAndGetAddressOf()); + } + + if (FAILED(hr) && m_using_flip_model_swap_chain) + { + Log_WarningPrintf("Failed to create a flip-discard swap chain, trying discard."); + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + swap_chain_desc.Flags = 0; + m_using_flip_model_swap_chain = false; + m_using_allow_tearing = false; + + hr = m_dxgi_factory->CreateSwapChainForHwnd(m_device.Get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, + m_swap_chain.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateSwapChainForHwnd failed: 0x%08X", hr); + return false; + } + } + + // we need the specific factory for the device, otherwise MakeWindowAssociation() is flaky. + ComPtr parent_factory; + if (FAILED(m_swap_chain->GetParent(IID_PPV_ARGS(parent_factory.GetAddressOf()))) || + FAILED(parent_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES))) + { + Log_WarningPrintf("MakeWindowAssociation() to disable ALT+ENTER failed"); + } + + if (!CreateSwapChainRTV()) + { + DestroySwapChain(); + return false; + } + + // Render a frame as soon as possible to clear out whatever was previously being displayed. + m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); + m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0); + return true; +} + +bool D3D11Device::CreateSwapChainRTV() +{ + ComPtr backbuffer; + HRESULT hr = m_swap_chain->GetBuffer(0, IID_PPV_ARGS(backbuffer.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("GetBuffer for RTV failed: 0x%08X", hr); + return false; + } + + D3D11_TEXTURE2D_DESC backbuffer_desc; + backbuffer->GetDesc(&backbuffer_desc); + + CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, backbuffer_desc.Format, 0, 0, + backbuffer_desc.ArraySize); + hr = m_device->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, m_swap_chain_rtv.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateRenderTargetView for swap chain failed: 0x%08X", hr); + m_swap_chain_rtv.Reset(); + return false; + } + + m_window_info.surface_width = backbuffer_desc.Width; + m_window_info.surface_height = backbuffer_desc.Height; + m_window_info.surface_format = s_swap_chain_format; + Log_VerbosePrintf("Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height); + + if (m_window_info.type == WindowInfo::Type::Win32) + { + BOOL fullscreen = FALSE; + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && + SUCCEEDED(m_swap_chain->GetDesc(&desc))) + { + m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + } + else + { + m_window_info.surface_refresh_rate = 0.0f; + } + } + + return true; +} + +void D3D11Device::DestroySwapChain() +{ + if (!m_swap_chain) + return; + + m_swap_chain_rtv.Reset(); + + // switch out of fullscreen before destroying + BOOL is_fullscreen; + if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen) + m_swap_chain->SetFullscreenState(FALSE, nullptr); + + m_swap_chain.Reset(); + m_is_exclusive_fullscreen = false; +} + +bool D3D11Device::UpdateWindow() +{ + DestroySwapChain(); + + if (!AcquireWindow(false)) + return false; + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()) + { + Log_ErrorPrintf("Failed to create swap chain on updated window"); + return false; + } + + return true; +} + +void D3D11Device::DestroySurface() +{ + DestroySwapChain(); +} + +void D3D11Device::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + if (!m_swap_chain || m_is_exclusive_fullscreen) + return; + + m_window_info.surface_scale = new_window_scale; + + if (m_window_info.surface_width == static_cast(new_window_width) && + m_window_info.surface_height == static_cast(new_window_height)) + { + return; + } + + m_swap_chain_rtv.Reset(); + + HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, + m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); + if (FAILED(hr)) + Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); + + if (!CreateSwapChainRTV()) + Panic("Failed to recreate swap chain RTV after resize"); +} + +bool D3D11Device::SupportsExclusiveFullscreen() const +{ + return true; +} + +std::string D3D11Device::GetDriverInfo() const +{ + const D3D_FEATURE_LEVEL fl = m_device->GetFeatureLevel(); + std::string ret = + fmt::format("{} ({})\n", D3DCommon::GetFeatureLevelString(fl), D3DCommon::GetFeatureLevelShaderModelString(fl)); + + ComPtr dxgi_dev; + if (m_device.As(&dxgi_dev)) + { + ComPtr dxgi_adapter; + if (SUCCEEDED(dxgi_dev->GetAdapter(dxgi_adapter.GetAddressOf()))) + { + DXGI_ADAPTER_DESC desc; + if (SUCCEEDED(dxgi_adapter->GetDesc(&desc))) + { + ret += StringUtil::StdStringFromFormat("VID: 0x%04X PID: 0x%04X\n", desc.VendorId, desc.DeviceId); + ret += StringUtil::WideStringToUTF8String(desc.Description); + ret += "\n"; + + const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid)); + if (!driver_version.empty()) + { + ret += "Driver Version: "; + ret += driver_version; + } + } + } + } + + return ret; +} + +bool D3D11Device::CreateBuffers() +{ + if (!m_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE) || + !m_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE) || + !m_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, UNIFORM_BUFFER_SIZE)) + { + Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); + return false; + } + + // Index buffer never changes :) + m_context->IASetIndexBuffer(m_index_buffer.GetD3DBuffer(), DXGI_FORMAT_R16_UINT, 0); + return true; +} + +void D3D11Device::DestroyBuffers() +{ + m_uniform_buffer.Destroy(); + m_vertex_buffer.Destroy(); + m_index_buffer.Destroy(); +} + +void D3D11Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipHeight(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + + D3D11Texture* dst11 = static_cast(dst); + D3D11Texture* src11 = static_cast(src); + + if (dst11->IsRenderTargetOrDepthStencil()) + { + if (src11->GetState() == GPUTexture::State::Cleared) + { + if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) + { + // pass clear through + dst11->m_state = src11->m_state; + dst11->m_clear_value = src11->m_clear_value; + return; + } + } + else if (dst_x == 0 && dst_y == 0 && width == dst11->GetMipWidth(dst_level) && + height == dst11->GetMipHeight(dst_level)) + { + m_context->DiscardView(dst11->GetRTVOrDSV()); + dst11->SetState(GPUTexture::State::Dirty); + } + + dst11->CommitClear(m_context.Get()); + } + + src11->CommitClear(m_context.Get()); + + const CD3D11_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), + static_cast(src_y + height), 1); + m_context->CopySubresourceRegion(dst11->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + dst_x, dst_y, 0, src11->GetD3DTexture(), + D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); +} + +void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) +{ + DebugAssert((src_x + width) <= src->GetWidth()); + DebugAssert((src_y + height) <= src->GetHeight()); + DebugAssert(src->IsMultisampled()); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + + // DX11 can't resolve partial rects. + Assert(src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && + dst_y == 0 && width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level)); + + D3D11Texture* dst11 = static_cast(dst); + D3D11Texture* src11 = static_cast(src); + + src11->CommitClear(m_context.Get()); + dst11->CommitClear(m_context.Get()); + + m_context->ResolveSubresource(dst11->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + src11->GetD3DTexture(), 0, dst11->GetDXGIFormat()); +} + +void D3D11Device::ClearRenderTarget(GPUTexture* t, u32 c) +{ + GPUDevice::ClearRenderTarget(t, c); + if (m_current_framebuffer && m_current_framebuffer->GetRT() == t) + static_cast(t)->CommitClear(m_context.Get()); +} + +void D3D11Device::ClearDepth(GPUTexture* t, float d) +{ + GPUDevice::ClearDepth(t, d); + if (m_current_framebuffer && m_current_framebuffer->GetDS() == t) + static_cast(t)->CommitClear(m_context.Get()); +} + +void D3D11Device::InvalidateRenderTarget(GPUTexture* t) +{ + GPUDevice::InvalidateRenderTarget(t); + if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + static_cast(t)->CommitClear(m_context.Get()); +} + +bool D3D11Device::GetHostRefreshRate(float* refresh_rate) +{ + if (m_swap_chain && m_is_exclusive_fullscreen) + { + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && + desc.BufferDesc.RefreshRate.Denominator > 0) + { + Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, + desc.BufferDesc.RefreshRate.Denominator); + *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + return true; + } + } + + return GPUDevice::GetHostRefreshRate(refresh_rate); +} + +void D3D11Device::SetVSync(bool enabled) +{ + m_vsync_enabled = enabled; +} + +bool D3D11Device::BeginPresent(bool skip_present) +{ + if (skip_present) + return false; + + if (!m_swap_chain) + { + // Note: Really slow on Intel... + m_context->Flush(); + return false; + } + + // Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode. + // This might get called repeatedly if it takes a while to switch back, that's the host's problem. + BOOL is_fullscreen; + if (m_is_exclusive_fullscreen && + (FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen)) + { + Host::SetFullscreen(false); + return false; + } + + // When using vsync, the time here seems to include the time for the buffer to become available. + // This blows our our GPU usage number considerably, so read the timestamp before the final blit + // in this configuration. It does reduce accuracy a little, but better than seeing 100% all of + // the time, when it's more like a couple of percent. + if (m_vsync_enabled && m_gpu_timing_enabled) + PopTimestampQuery(); + + static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), clear_color); + m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); + m_current_framebuffer = nullptr; + return true; +} + +void D3D11Device::EndPresent() +{ + DebugAssert(!m_current_framebuffer); + + if (!m_vsync_enabled && m_gpu_timing_enabled) + PopTimestampQuery(); + + if (!m_vsync_enabled && m_using_allow_tearing) + m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); + else + m_swap_chain->Present(BoolToUInt32(m_vsync_enabled), 0); + + if (m_gpu_timing_enabled) + KickTimestampQuery(); +} + +GPUDevice::AdapterAndModeList D3D11Device::StaticGetAdapterAndModeList() +{ + AdapterAndModeList ret; + std::unique_lock lock(s_instance_mutex); + + // Device shouldn't be torn down since we have the lock. + if (g_gpu_device && g_gpu_device->GetRenderAPI() == RenderAPI::D3D12) + { + GetAdapterAndModeList(&ret, D3D11Device::GetInstance().m_dxgi_factory.Get()); + } + else + { + ComPtr factory = D3DCommon::CreateFactory(false); + if (factory) + GetAdapterAndModeList(&ret, factory.Get()); + } + + return ret; +} + +void D3D11Device::GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory) +{ + ret->adapter_names = D3DCommon::GetAdapterNames(factory); + ret->fullscreen_modes = D3DCommon::GetFullscreenModes(factory, {}); +} + +GPUDevice::AdapterAndModeList D3D11Device::GetAdapterAndModeList() +{ + AdapterAndModeList ret; + GetAdapterAndModeList(&ret, m_dxgi_factory.Get()); + return ret; +} + +bool D3D11Device::CreateTimestampQueries() +{ + for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) + { + for (u32 j = 0; j < 3; j++) + { + const CD3D11_QUERY_DESC qdesc((j == 0) ? D3D11_QUERY_TIMESTAMP_DISJOINT : D3D11_QUERY_TIMESTAMP); + const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + m_timestamp_queries = {}; + return false; + } + } + } + + KickTimestampQuery(); + return true; +} + +void D3D11Device::DestroyTimestampQueries() +{ + if (!m_timestamp_queries[0][0]) + return; + + if (m_timestamp_query_started) + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + + m_timestamp_queries = {}; + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; +} + +void D3D11Device::PopTimestampQuery() +{ + while (m_waiting_timestamp_queries > 0) + { + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint, + sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (disjoint_hr != S_OK) + break; + + if (disjoint.Disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; + } + else + { + u64 start = 0, end = 0; + const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start, + sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH); + const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (start_hr == S_OK && end_hr == S_OK) + { + const float delta = + static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); + m_accumulated_gpu_time += delta; + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + } + } + + if (m_timestamp_query_started) + { + m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void D3D11Device::KickTimestampQuery() +{ + if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + m_timestamp_query_started = true; +} + +bool D3D11Device::SetGPUTimingEnabled(bool enabled) +{ + if (m_gpu_timing_enabled == enabled) + return true; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + { + if (!CreateTimestampQueries()) + return false; + + KickTimestampQuery(); + return true; + } + else + { + DestroyTimestampQueries(); + return true; + } +} + +float D3D11Device::GetAndResetAccumulatedGPUTime() +{ + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +} + +void D3D11Device::PushDebugGroup(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!m_annotation) + return; + + std::va_list ap; + va_start(ap, fmt); + std::string str(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + m_annotation->BeginEvent(StringUtil::UTF8StringToWideString(str).c_str()); +#endif +} + +void D3D11Device::PopDebugGroup() +{ +#ifdef _DEBUG + if (!m_annotation) + return; + + m_annotation->EndEvent(); +#endif +} + +void D3D11Device::InsertDebugMessage(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!m_annotation) + return; + + std::va_list ap; + va_start(ap, fmt); + std::string str(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + m_annotation->SetMarker(StringUtil::UTF8StringToWideString(str).c_str()); +#endif +} + +void D3D11Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const auto res = m_vertex_buffer.Map(m_context.Get(), vertex_size, vertex_size * vertex_count); + *map_ptr = res.pointer; + *map_space = res.space_aligned; + *map_base_vertex = res.index_aligned; +} + +void D3D11Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.Unmap(m_context.Get(), vertex_size * vertex_count); +} + +void D3D11Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const auto res = m_index_buffer.Map(m_context.Get(), sizeof(DrawIndex), sizeof(DrawIndex) * index_count); + *map_ptr = static_cast(res.pointer); + *map_space = res.space_aligned; + *map_base_index = res.index_aligned; +} + +void D3D11Device::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.Unmap(m_context.Get(), sizeof(DrawIndex) * used_index_count); +} + +void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) +{ + const u32 used_space = Common::AlignUpPow2(data_size, UNIFORM_BUFFER_ALIGNMENT); + const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space); + std::memcpy(res.pointer, data, data_size); + m_uniform_buffer.Unmap(m_context.Get(), data_size); + + const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; + const UINT num_constants = (used_space * UNIFORM_BUFFER_ALIGNMENT) / 16u; + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); +} + +void* D3D11Device::MapUniformBuffer(u32 size) +{ + const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); + const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space); + return res.pointer; +} + +void D3D11Device::UnmapUniformBuffer(u32 size) +{ + const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); + const UINT first_constant = m_uniform_buffer.GetPosition() / 16u; + const UINT num_constants = used_space / 16u; + + m_uniform_buffer.Unmap(m_context.Get(), used_space); + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); +} + +void D3D11Device::SetFramebuffer(GPUFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + return; + + m_current_framebuffer = static_cast(fb); + if (!m_current_framebuffer) + { + m_context->OMSetRenderTargets(0, nullptr, nullptr); + return; + } + + // Make sure textures aren't bound. + if (D3D11Texture* rt = static_cast(fb->GetRT()); rt) + { + const ID3D11ShaderResourceView* srv = rt->GetD3DSRV(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == srv) + { + m_current_textures[i] = nullptr; + m_context->PSSetShaderResources(i, 1, &m_current_textures[i]); + } + } + } + if (D3D11Texture* ds = static_cast(fb->GetDS()); ds) + { + const ID3D11ShaderResourceView* srv = ds->GetD3DSRV(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == srv) + { + m_current_textures[i] = nullptr; + m_context->PSSetShaderResources(i, 1, &m_current_textures[i]); + } + } + } + + m_current_framebuffer->CommitClear(m_context.Get()); + m_context->OMSetRenderTargets(m_current_framebuffer->GetNumRTVs(), m_current_framebuffer->GetRTVArray(), + m_current_framebuffer->GetDSV()); +} + +void D3D11Device::UnbindFramebuffer(D3D11Framebuffer* fb) +{ + if (m_current_framebuffer != fb) + return; + + m_current_framebuffer = nullptr; + m_context->OMSetRenderTargets(0, nullptr, nullptr); +} + +void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + ID3D11ShaderResourceView* T = texture ? static_cast(texture)->GetD3DSRV() : nullptr; + ID3D11SamplerState* S = sampler ? static_cast(sampler)->GetSamplerState() : nullptr; + + // Runtime will null these if we don't... + DebugAssert(!m_current_framebuffer || !texture || + (m_current_framebuffer->GetRT() != texture && m_current_framebuffer->GetDS() != texture)); + + if (m_current_textures[slot] != T) + { + m_current_textures[slot] = T; + m_context->PSSetShaderResources(slot, 1, &T); + } + if (m_current_samplers[slot] != S) + { + m_current_samplers[slot] = S; + m_context->PSSetSamplers(slot, 1, &S); + } +} + +void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ + ID3D11ShaderResourceView* B = buffer ? static_cast(buffer)->GetSRV() : nullptr; + if (m_current_textures[slot] != B) + { + m_current_textures[slot] = B; + m_context->PSSetShaderResources(slot, 1, &B); + } +} + +void D3D11Device::UnbindTexture(D3D11Texture* tex) +{ + if (const ID3D11ShaderResourceView* srv = tex->GetD3DSRV(); srv) + { + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == srv) + { + m_current_textures[i] = nullptr; + m_context->PSSetShaderResources(i, 1, &m_current_textures[i]); + } + } + } + + if (m_current_framebuffer && m_current_framebuffer->GetRT() == tex) + SetFramebuffer(nullptr); +} + +void D3D11Device::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const CD3D11_VIEWPORT vp(static_cast(x), static_cast(y), static_cast(width), + static_cast(height), 0.0f, 1.0f); + m_context->RSSetViewports(1, &vp); +} + +void D3D11Device::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const CD3D11_RECT rc(x, y, x + width, y + height); + m_context->RSSetScissorRects(1, &rc); +} + +void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) +{ + m_context->Draw(vertex_count, base_vertex); +} + +void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + m_context->DrawIndexed(index_count, base_index, base_vertex); +} diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h new file mode 100644 index 000000000..4fac4c379 --- /dev/null +++ b/src/util/d3d11_device.h @@ -0,0 +1,205 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "d3d11_stream_buffer.h" +#include "gpu_device.h" + +#include "common/windows_headers.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +class D3D11Framebuffer; +class D3D11Pipeline; +class D3D11Shader; +class D3D11Texture; +class D3D11TextureBuffer; + +class D3D11Device final : public GPUDevice +{ +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + D3D11Device(); + ~D3D11Device(); + + ALWAYS_INLINE static D3D11Device& GetInstance() { return *static_cast(g_gpu_device.get()); } + ALWAYS_INLINE static ID3D11Device* GetD3DDevice() { return GetInstance().m_device.Get(); } + ALWAYS_INLINE static ID3D11DeviceContext1* GetD3DContext() { return GetInstance().m_context.Get(); } + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + bool SupportsExclusiveFullscreen() const override; + AdapterAndModeList GetAdapterAndModeList() override; + void DestroySurface() override; + + std::string GetDriverInfo() const override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 width, u32 height) override; + void ClearRenderTarget(GPUTexture* t, u32 c) override; + void ClearDepth(GPUTexture* t, float d) override; + void InvalidateRenderTarget(GPUTexture* t) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, DynamicHeapArray* binary) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size); + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + bool GetHostRefreshRate(float* refresh_rate) override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + void UnbindFramebuffer(D3D11Framebuffer* fb); + void UnbindPipeline(D3D11Pipeline* pl); + void UnbindTexture(D3D11Texture* tex); + + static AdapterAndModeList StaticGetAdapterAndModeList(); + +protected: + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) override; + void DestroyDevice() override; + +private: + using RasterizationStateMap = std::unordered_map>; + using DepthStateMap = std::unordered_map>; + using BlendStateMap = std::unordered_map>; + using InputLayoutMap = + std::unordered_map, GPUPipeline::InputLayoutHash>; + + static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; + + static void GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory); + + void SetFeatures(); + + bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); + void DestroyStagingBuffer(); + + bool CreateSwapChain(); + bool CreateSwapChainRTV(); + void DestroySwapChain(); + + bool CreateBuffers(); + void DestroyBuffers(); + + ComPtr GetRasterizationState(const GPUPipeline::RasterizationState& rs); + ComPtr GetDepthState(const GPUPipeline::DepthState& ds); + ComPtr GetBlendState(const GPUPipeline::BlendState& bs); + ComPtr GetInputLayout(const GPUPipeline::InputLayout& il, const D3D11Shader* vs); + + bool CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + + ComPtr m_device; + ComPtr m_context; + ComPtr m_annotation; + + ComPtr m_dxgi_factory; + ComPtr m_swap_chain; + ComPtr m_swap_chain_rtv; + + RasterizationStateMap m_rasterization_states; + DepthStateMap m_depth_states; + BlendStateMap m_blend_states; + InputLayoutMap m_input_layouts; + + ComPtr m_readback_staging_texture; + DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; + u32 m_readback_staging_texture_width = 0; + u32 m_readback_staging_texture_height = 0; + + bool m_allow_tearing_supported = false; + bool m_using_flip_model_swap_chain = true; + bool m_using_allow_tearing = false; + bool m_is_exclusive_fullscreen = false; + + D3D11StreamBuffer m_vertex_buffer; + D3D11StreamBuffer m_index_buffer; + D3D11StreamBuffer m_uniform_buffer; + + D3D11Framebuffer* m_current_framebuffer = nullptr; + D3D11Pipeline* m_current_pipeline = nullptr; + + ID3D11InputLayout* m_current_input_layout = nullptr; + ID3D11VertexShader* m_current_vertex_shader = nullptr; + ID3D11PixelShader* m_current_pixel_shader = nullptr; + ID3D11RasterizerState* m_current_rasterizer_state = nullptr; + ID3D11DepthStencilState* m_current_depth_state = nullptr; + ID3D11BlendState* m_current_blend_state = nullptr; + D3D_PRIMITIVE_TOPOLOGY m_current_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + u32 m_current_vertex_stride = 0; + u32 m_current_blend_factor = 0; + + std::array m_current_textures = {}; + std::array m_current_samplers = {}; + + std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; + u8 m_read_timestamp_query = 0; + u8 m_write_timestamp_query = 0; + u8 m_waiting_timestamp_queries = 0; + bool m_timestamp_query_started = false; + float m_accumulated_gpu_time = 0.0f; +}; + +void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view& name); diff --git a/src/util/d3d11_host_display.cpp b/src/util/d3d11_host_display.cpp deleted file mode 100644 index e9bff79c6..000000000 --- a/src/util/d3d11_host_display.cpp +++ /dev/null @@ -1,1194 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "d3d11_host_display.h" -#include "common/assert.h" -#include "common/d3d11/shader_cache.h" -#include "common/d3d11/shader_compiler.h" -#include "common/log.h" -#include "common/string_util.h" -#include "core/common_host.h" -#include "core/host_settings.h" -#include "core/settings.h" -#include "core/shader_cache_version.h" -#include "display_ps.hlsl.h" -#include "display_ps_alpha.hlsl.h" -#include "display_vs.hlsl.h" -#include "imgui.h" -#include "imgui_impl_dx11.h" -#include "postprocessing_shadergen.h" -#include -#include -Log_SetChannel(D3D11HostDisplay); - -#pragma comment(lib, "d3d11.lib") -#pragma comment(lib, "dxgi.lib") - -static constexpr std::array s_clear_color = {}; - -D3D11HostDisplay::D3D11HostDisplay() = default; - -D3D11HostDisplay::~D3D11HostDisplay() -{ - DestroyStagingBuffer(); - DestroyResources(); - DestroySurface(); - m_context.Reset(); - m_device.Reset(); -} - -RenderAPI D3D11HostDisplay::GetRenderAPI() const -{ - return RenderAPI::D3D11; -} - -void* D3D11HostDisplay::GetDevice() const -{ - return m_device.Get(); -} - -void* D3D11HostDisplay::GetContext() const -{ - return m_context.Get(); -} - -bool D3D11HostDisplay::HasDevice() const -{ - return static_cast(m_device); -} - -bool D3D11HostDisplay::HasSurface() const -{ - return static_cast(m_swap_chain); -} - -std::unique_ptr D3D11HostDisplay::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, - u32 data_stride, bool dynamic /* = false */) -{ - std::unique_ptr tex(std::make_unique()); - if (!tex->Create(m_device.Get(), width, height, layers, levels, samples, format, D3D11_BIND_SHADER_RESOURCE, data, - data_stride, dynamic)) - { - tex.reset(); - } - - return tex; -} - -bool D3D11HostDisplay::BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) -{ - D3D11::Texture* tex = static_cast(texture); - if (!tex->IsDynamic() || tex->GetWidth() != width || tex->GetHeight() != height) - return false; - - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(tex->GetD3DTexture(), 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map pixels texture failed: %08X", hr); - return false; - } - - *out_buffer = sr.pData; - *out_pitch = sr.RowPitch; - return true; -} - -void D3D11HostDisplay::EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) -{ - D3D11::Texture* tex = static_cast(texture); - m_context->Unmap(tex->GetD3DTexture(), 0); -} - -bool D3D11HostDisplay::UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, - u32 pitch) -{ - D3D11::Texture* tex = static_cast(texture); - if (tex->IsDynamic()) - return HostDisplay::UpdateTexture(texture, x, y, width, height, data, pitch); - - const CD3D11_BOX dst_box(x, y, 0, x + width, y + height, 1); - m_context->UpdateSubresource(tex->GetD3DTexture(), 0, &dst_box, data, pitch, pitch * height); - return true; -} - -bool D3D11HostDisplay::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - const D3D11::Texture* tex = static_cast(texture); - if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat())) - return false; - - const CD3D11_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), - static_cast(y + height), 1); - m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box); - - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, D3D11_MAP_READ, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); - return false; - } - - const u32 copy_size = tex->GetPixelSize() * width; - StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); - m_context->Unmap(m_readback_staging_texture.Get(), 0); - return true; -} - -bool D3D11HostDisplay::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) -{ - if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && - m_readback_staging_texture_format == format) - return true; - - DestroyStagingBuffer(); - - CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ); - HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr); - return false; - } - - return true; -} - -void D3D11HostDisplay::DestroyStagingBuffer() -{ - m_readback_staging_texture.Reset(); - m_readback_staging_texture_width = 0; - m_readback_staging_texture_height = 0; - m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; -} - -bool D3D11HostDisplay::SupportsTextureFormat(GPUTexture::Format format) const -{ - const DXGI_FORMAT dfmt = D3D11::Texture::GetDXGIFormat(format); - if (dfmt == DXGI_FORMAT_UNKNOWN) - return false; - - UINT support = 0; - const UINT required = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; - return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support)) && ((support & required) == required)); -} - -bool D3D11HostDisplay::GetHostRefreshRate(float* refresh_rate) -{ - if (m_swap_chain && IsFullscreen()) - { - DXGI_SWAP_CHAIN_DESC desc; - if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && - desc.BufferDesc.RefreshRate.Denominator > 0) - { - Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, - desc.BufferDesc.RefreshRate.Denominator); - *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / - static_cast(desc.BufferDesc.RefreshRate.Denominator); - return true; - } - } - - return HostDisplay::GetHostRefreshRate(refresh_rate); -} - -void D3D11HostDisplay::SetVSync(bool enabled) -{ - m_vsync_enabled = enabled; -} - -bool D3D11HostDisplay::CreateDevice(const WindowInfo& wi, bool vsync) -{ - UINT create_flags = 0; - if (g_settings.gpu_use_debug_device) - create_flags |= D3D11_CREATE_DEVICE_DEBUG; - - ComPtr temp_dxgi_factory; - HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(temp_dxgi_factory.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create DXGI factory: 0x%08X", hr); - return false; - } - - u32 adapter_index; - if (!g_settings.gpu_adapter.empty()) - { - AdapterAndModeList adapter_info(GetAdapterAndModeList(temp_dxgi_factory.Get())); - for (adapter_index = 0; adapter_index < static_cast(adapter_info.adapter_names.size()); adapter_index++) - { - if (g_settings.gpu_adapter == adapter_info.adapter_names[adapter_index]) - break; - } - if (adapter_index == static_cast(adapter_info.adapter_names.size())) - { - Log_WarningPrintf("Could not find adapter '%s', using first (%s)", g_settings.gpu_adapter.c_str(), - adapter_info.adapter_names[0].c_str()); - adapter_index = 0; - } - } - else - { - Log_InfoPrintf("No adapter selected, using first."); - adapter_index = 0; - } - - ComPtr dxgi_adapter; - hr = temp_dxgi_factory->EnumAdapters(adapter_index, dxgi_adapter.GetAddressOf()); - if (FAILED(hr)) - Log_WarningPrintf("Failed to enumerate adapter %u, using default", adapter_index); - - static constexpr std::array requested_feature_levels = { - {D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}}; - - hr = - D3D11CreateDevice(dxgi_adapter.Get(), dxgi_adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE, nullptr, - create_flags, requested_feature_levels.data(), static_cast(requested_feature_levels.size()), - D3D11_SDK_VERSION, m_device.GetAddressOf(), nullptr, m_context.GetAddressOf()); - - // we re-grab these later, see below - dxgi_adapter.Reset(); - temp_dxgi_factory.Reset(); - - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create D3D device: 0x%08X", hr); - return false; - } - - if (g_settings.gpu_use_debug_device && IsDebuggerPresent()) - { - ComPtr info; - hr = m_device.As(&info); - if (SUCCEEDED(hr)) - { - info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_ERROR, TRUE); - info->SetBreakOnSeverity(D3D11_MESSAGE_SEVERITY_WARNING, TRUE); - } - } - - // we need the specific factory for the device, otherwise MakeWindowAssociation() is flaky. - ComPtr dxgi_device; - if (FAILED(m_device.As(&dxgi_device)) || FAILED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.GetAddressOf()))) || - FAILED(dxgi_adapter->GetParent(IID_PPV_ARGS(m_dxgi_factory.GetAddressOf())))) - { - Log_WarningPrint("Failed to get parent adapter/device/factory"); - return false; - } - ComPtr dxgi_device1; - if (SUCCEEDED(dxgi_device.As(&dxgi_device1))) - dxgi_device1->SetMaximumFrameLatency(1); - - DXGI_ADAPTER_DESC adapter_desc; - if (SUCCEEDED(dxgi_adapter->GetDesc(&adapter_desc))) - { - char adapter_name_buffer[128]; - const int name_length = - WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, static_cast(std::wcslen(adapter_desc.Description)), - adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); - if (name_length >= 0) - { - adapter_name_buffer[name_length] = 0; - Log_InfoPrintf("D3D Adapter: %s", adapter_name_buffer); - } - } - - m_allow_tearing_supported = false; - ComPtr dxgi_factory5; - hr = m_dxgi_factory.As(&dxgi_factory5); - if (SUCCEEDED(hr)) - { - BOOL allow_tearing_supported = false; - hr = dxgi_factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, - sizeof(allow_tearing_supported)); - if (SUCCEEDED(hr)) - m_allow_tearing_supported = (allow_tearing_supported == TRUE); - } - - m_window_info = wi; - m_vsync_enabled = vsync; - - if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain(nullptr)) - { - m_window_info = {}; - return false; - } - - return true; -} - -bool D3D11HostDisplay::SetupDevice() -{ - if (!CreateResources()) - return false; - - return true; -} - -bool D3D11HostDisplay::MakeCurrent() -{ - return true; -} - -bool D3D11HostDisplay::DoneCurrent() -{ - return true; -} - -bool D3D11HostDisplay::CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode) -{ - HRESULT hr; - - if (m_window_info.type != WindowInfo::Type::Win32) - return false; - - m_using_flip_model_swap_chain = fullscreen_mode || !Host::GetBoolSettingValue("Display", "UseBlitSwapChain", false); - - const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); - RECT client_rc{}; - GetClientRect(window_hwnd, &client_rc); - const u32 width = static_cast(client_rc.right - client_rc.left); - const u32 height = static_cast(client_rc.bottom - client_rc.top); - - DXGI_SWAP_CHAIN_DESC swap_chain_desc = {}; - swap_chain_desc.BufferDesc.Width = width; - swap_chain_desc.BufferDesc.Height = height; - swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swap_chain_desc.SampleDesc.Count = 1; - swap_chain_desc.BufferCount = 2; - swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swap_chain_desc.OutputWindow = window_hwnd; - swap_chain_desc.Windowed = TRUE; - swap_chain_desc.SwapEffect = m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD; - - m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !fullscreen_mode); - if (m_using_allow_tearing) - swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; - - if (fullscreen_mode) - { - swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; - swap_chain_desc.Windowed = FALSE; - swap_chain_desc.BufferDesc = *fullscreen_mode; - } - - Log_InfoPrintf("Creating a %dx%d %s %s swap chain", swap_chain_desc.BufferDesc.Width, - swap_chain_desc.BufferDesc.Height, m_using_flip_model_swap_chain ? "flip-discard" : "discard", - swap_chain_desc.Windowed ? "windowed" : "full-screen"); - - hr = m_dxgi_factory->CreateSwapChain(m_device.Get(), &swap_chain_desc, m_swap_chain.GetAddressOf()); - if (FAILED(hr) && m_using_flip_model_swap_chain) - { - Log_WarningPrintf("Failed to create a flip-discard swap chain, trying discard."); - swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; - swap_chain_desc.Flags = 0; - m_using_flip_model_swap_chain = false; - m_using_allow_tearing = false; - - hr = m_dxgi_factory->CreateSwapChain(m_device.Get(), &swap_chain_desc, m_swap_chain.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateSwapChain failed: 0x%08X", hr); - return false; - } - } - - ComPtr dxgi_factory; - hr = m_swap_chain->GetParent(IID_PPV_ARGS(dxgi_factory.GetAddressOf())); - if (SUCCEEDED(hr)) - { - hr = dxgi_factory->MakeWindowAssociation(swap_chain_desc.OutputWindow, DXGI_MWA_NO_WINDOW_CHANGES); - if (FAILED(hr)) - Log_WarningPrintf("MakeWindowAssociation() to disable ALT+ENTER failed"); - } - - return CreateSwapChainRTV(); -} - -bool D3D11HostDisplay::CreateSwapChainRTV() -{ - ComPtr backbuffer; - HRESULT hr = m_swap_chain->GetBuffer(0, IID_PPV_ARGS(backbuffer.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("GetBuffer for RTV failed: 0x%08X", hr); - return false; - } - - D3D11_TEXTURE2D_DESC backbuffer_desc; - backbuffer->GetDesc(&backbuffer_desc); - - CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(D3D11_RTV_DIMENSION_TEXTURE2D, backbuffer_desc.Format, 0, 0, - backbuffer_desc.ArraySize); - hr = m_device->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, m_swap_chain_rtv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateRenderTargetView for swap chain failed: 0x%08X", hr); - return false; - } - - m_window_info.surface_width = backbuffer_desc.Width; - m_window_info.surface_height = backbuffer_desc.Height; - Log_InfoPrintf("Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height); - - if (m_window_info.type == WindowInfo::Type::Win32) - { - BOOL fullscreen = FALSE; - DXGI_SWAP_CHAIN_DESC desc; - if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && - SUCCEEDED(m_swap_chain->GetDesc(&desc))) - { - m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / - static_cast(desc.BufferDesc.RefreshRate.Denominator); - } - else - { - m_window_info.surface_refresh_rate = 0.0f; - } - } - - return true; -} - -bool D3D11HostDisplay::ChangeWindow(const WindowInfo& new_wi) -{ - DestroySurface(); - - m_window_info = new_wi; - return CreateSwapChain(nullptr); -} - -void D3D11HostDisplay::DestroySurface() -{ - m_window_info.SetSurfaceless(); - if (IsFullscreen()) - SetFullscreen(false, 0, 0, 0.0f); - - m_swap_chain_rtv.Reset(); - m_swap_chain.Reset(); -} - -void D3D11HostDisplay::ResizeWindow(s32 new_window_width, s32 new_window_height) -{ - if (!m_swap_chain) - return; - - m_swap_chain_rtv.Reset(); - - HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, - m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); - if (FAILED(hr)) - Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); - - if (!CreateSwapChainRTV()) - Panic("Failed to recreate swap chain RTV after resize"); -} - -bool D3D11HostDisplay::SupportsFullscreen() const -{ - return true; -} - -bool D3D11HostDisplay::IsFullscreen() -{ - BOOL is_fullscreen = FALSE; - return (m_swap_chain && SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen); -} - -bool D3D11HostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) -{ - if (!m_swap_chain) - return false; - - BOOL is_fullscreen = FALSE; - HRESULT hr = m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr); - if (!fullscreen) - { - // leaving fullscreen - if (is_fullscreen) - return SUCCEEDED(m_swap_chain->SetFullscreenState(FALSE, nullptr)); - else - return true; - } - - IDXGIOutput* output; - if (FAILED(hr = m_swap_chain->GetContainingOutput(&output))) - return false; - - DXGI_SWAP_CHAIN_DESC current_desc; - hr = m_swap_chain->GetDesc(¤t_desc); - if (FAILED(hr)) - return false; - - DXGI_MODE_DESC new_mode = current_desc.BufferDesc; - new_mode.Width = width; - new_mode.Height = height; - new_mode.RefreshRate.Numerator = static_cast(std::floor(refresh_rate * 1000.0f)); - new_mode.RefreshRate.Denominator = 1000u; - - DXGI_MODE_DESC closest_mode; - if (FAILED(hr = output->FindClosestMatchingMode(&new_mode, &closest_mode, nullptr)) || - new_mode.Format != current_desc.BufferDesc.Format) - { - Log_ErrorPrintf("Failed to find closest matching mode, hr=%08X", hr); - return false; - } - - if (new_mode.Width == current_desc.BufferDesc.Width && new_mode.Height == current_desc.BufferDesc.Height && - new_mode.RefreshRate.Numerator == current_desc.BufferDesc.RefreshRate.Numerator && - new_mode.RefreshRate.Denominator == current_desc.BufferDesc.RefreshRate.Denominator) - { - Log_InfoPrintf("Fullscreen mode already set"); - return true; - } - - m_swap_chain_rtv.Reset(); - m_swap_chain.Reset(); - - if (!CreateSwapChain(&closest_mode)) - { - Log_ErrorPrintf("Failed to create a fullscreen swap chain"); - if (!CreateSwapChain(nullptr)) - Panic("Failed to recreate windowed swap chain"); - - return false; - } - - return true; -} - -bool D3D11HostDisplay::CreateResources() -{ - HRESULT hr; - - m_display_vertex_shader = - D3D11::ShaderCompiler::CreateVertexShader(m_device.Get(), s_display_vs_bytecode, sizeof(s_display_vs_bytecode)); - m_display_pixel_shader = - D3D11::ShaderCompiler::CreatePixelShader(m_device.Get(), s_display_ps_bytecode, sizeof(s_display_ps_bytecode)); - m_display_alpha_pixel_shader = D3D11::ShaderCompiler::CreatePixelShader(m_device.Get(), s_display_ps_alpha_bytecode, - sizeof(s_display_ps_alpha_bytecode)); - if (!m_display_vertex_shader || !m_display_pixel_shader || !m_display_alpha_pixel_shader) - return false; - - if (!m_display_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, DISPLAY_UNIFORM_BUFFER_SIZE)) - return false; - - CD3D11_RASTERIZER_DESC rasterizer_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rasterizer_desc.CullMode = D3D11_CULL_NONE; - hr = m_device->CreateRasterizerState(&rasterizer_desc, m_display_rasterizer_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_DEPTH_STENCIL_DESC depth_stencil_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - depth_stencil_desc.DepthEnable = FALSE; - depth_stencil_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = m_device->CreateDepthStencilState(&depth_stencil_desc, m_display_depth_stencil_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_BLEND_DESC blend_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = m_device->CreateBlendState(&blend_desc, m_display_blend_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - blend_desc.RenderTarget[0] = {TRUE, - D3D11_BLEND_SRC_ALPHA, - D3D11_BLEND_INV_SRC_ALPHA, - D3D11_BLEND_OP_ADD, - D3D11_BLEND_ONE, - D3D11_BLEND_ZERO, - D3D11_BLEND_OP_ADD, - D3D11_COLOR_WRITE_ENABLE_ALL}; - hr = m_device->CreateBlendState(&blend_desc, m_software_cursor_blend_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; - sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; - sampler_desc.BorderColor[0] = 0.0f; - sampler_desc.BorderColor[1] = 0.0f; - sampler_desc.BorderColor[2] = 0.0f; - sampler_desc.BorderColor[3] = 1.0f; - hr = m_device->CreateSamplerState(&sampler_desc, m_border_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - return true; -} - -void D3D11HostDisplay::DestroyResources() -{ - HostDisplay::DestroyResources(); - - m_post_processing_chain.ClearStages(); - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - - m_display_uniform_buffer.Release(); - m_border_sampler.Reset(); - m_linear_sampler.Reset(); - m_point_sampler.Reset(); - m_display_alpha_pixel_shader.Reset(); - m_display_pixel_shader.Reset(); - m_display_vertex_shader.Reset(); - m_display_blend_state.Reset(); - m_display_depth_stencil_state.Reset(); - m_display_rasterizer_state.Reset(); -} - -bool D3D11HostDisplay::CreateImGuiContext() -{ - return ImGui_ImplDX11_Init(m_device.Get(), m_context.Get()); -} - -void D3D11HostDisplay::DestroyImGuiContext() -{ - ImGui_ImplDX11_Shutdown(); -} - -bool D3D11HostDisplay::UpdateImGuiFontTexture() -{ - ImGui_ImplDX11_CreateFontsTexture(); - return true; -} - -bool D3D11HostDisplay::Render(bool skip_present) -{ - if (skip_present || !m_swap_chain) - { - if (ImGui::GetCurrentContext()) - ImGui::Render(); - - return false; - } - - // When using vsync, the time here seems to include the time for the buffer to become available. - // This blows our our GPU usage number considerably, so read the timestamp before the final blit - // in this configuration. It does reduce accuracy a little, but better than seeing 100% all of - // the time, when it's more like a couple of percent. - if (m_vsync_enabled && m_gpu_timing_enabled) - PopTimestampQuery(); - - RenderDisplay(); - - if (ImGui::GetCurrentContext()) - RenderImGui(); - - RenderSoftwareCursor(); - - if (!m_vsync_enabled && m_gpu_timing_enabled) - PopTimestampQuery(); - - if (!m_vsync_enabled && m_using_allow_tearing) - m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); - else - m_swap_chain->Present(BoolToUInt32(m_vsync_enabled), 0); - - if (m_gpu_timing_enabled) - KickTimestampQuery(); - - return true; -} - -bool D3D11HostDisplay::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; - - D3D11::Texture render_texture; - if (!render_texture.Create(m_device.Get(), width, height, 1, 1, 1, hdformat, D3D11_BIND_RENDER_TARGET)) - return false; - - static constexpr std::array clear_color = {}; - m_context->ClearRenderTargetView(render_texture.GetD3DRTV(), clear_color.data()); - m_context->OMSetRenderTargets(1, render_texture.GetD3DRTVArray(), nullptr); - - if (HasDisplayTexture()) - { - if (!m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(render_texture.GetD3DRTV(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - } - - m_context->OMSetRenderTargets(0, nullptr, nullptr); - - const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; - out_pixels->resize(width * height); - if (!DownloadTexture(&render_texture, 0, 0, width, height, out_pixels->data(), stride)) - return false; - - *out_stride = stride; - *out_format = hdformat; - return true; -} - -void D3D11HostDisplay::RenderImGui() -{ - ImGui::Render(); - ImGui_ImplDX11_RenderDrawData(ImGui::GetDrawData()); -} - -void D3D11HostDisplay::RenderDisplay() -{ - const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height, - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - GetWindowWidth(), GetWindowHeight()); - return; - } - - m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); - m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); - - if (!HasDisplayTexture()) - return; - - RenderDisplay(left, top, width, height, static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - IsUsingLinearFiltering()); -} - -void D3D11HostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, D3D11::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, bool linear_filter) -{ - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(m_display_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_display_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, texture->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, linear_filter ? m_linear_sampler.GetAddressOf() : m_point_sampler.GetAddressOf()); - - const bool linear = IsUsingLinearFiltering(); - const float position_adjust = linear ? 0.5f : 0.0f; - const float size_adjust = linear ? 1.0f : 0.0f; - const float uniforms[4] = { - (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), - (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; - const auto map = m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), sizeof(uniforms)); - std::memcpy(map.pointer, uniforms, sizeof(uniforms)); - m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(uniforms)); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - const CD3D11_VIEWPORT vp(static_cast(left), static_cast(top), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); - m_context->RSSetState(m_display_rasterizer_state.Get()); - m_context->OMSetDepthStencilState(m_display_depth_stencil_state.Get(), 0); - m_context->OMSetBlendState(m_display_blend_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - -void D3D11HostDisplay::RenderSoftwareCursor() -{ - if (!HasSoftwareCursor()) - return; - - const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); - RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get()); -} - -void D3D11HostDisplay::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle) -{ - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(m_display_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_display_alpha_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, static_cast(texture_handle)->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_linear_sampler.GetAddressOf()); - - const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; - const auto map = m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), sizeof(uniforms)); - std::memcpy(map.pointer, uniforms, sizeof(uniforms)); - m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(uniforms)); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - const CD3D11_VIEWPORT vp(static_cast(left), static_cast(top), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); - m_context->RSSetState(m_display_rasterizer_state.Get()); - m_context->OMSetDepthStencilState(m_display_depth_stencil_state.Get(), 0); - m_context->OMSetBlendState(m_software_cursor_blend_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - -HostDisplay::AdapterAndModeList D3D11HostDisplay::StaticGetAdapterAndModeList() -{ - ComPtr dxgi_factory; - HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(dxgi_factory.GetAddressOf())); - if (FAILED(hr)) - return {}; - - return GetAdapterAndModeList(dxgi_factory.Get()); -} - -HostDisplay::AdapterAndModeList D3D11HostDisplay::GetAdapterAndModeList(IDXGIFactory* dxgi_factory) -{ - AdapterAndModeList adapter_info; - ComPtr current_adapter; - while (SUCCEEDED(dxgi_factory->EnumAdapters(static_cast(adapter_info.adapter_names.size()), - current_adapter.ReleaseAndGetAddressOf()))) - { - DXGI_ADAPTER_DESC adapter_desc; - std::string adapter_name; - if (SUCCEEDED(current_adapter->GetDesc(&adapter_desc))) - { - char adapter_name_buffer[128]; - const int name_length = WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, - static_cast(std::wcslen(adapter_desc.Description)), - adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); - if (name_length >= 0) - adapter_name.assign(adapter_name_buffer, static_cast(name_length)); - else - adapter_name.assign("(Unknown)"); - } - else - { - adapter_name.assign("(Unknown)"); - } - - if (adapter_info.fullscreen_modes.empty()) - { - ComPtr output; - if (SUCCEEDED(current_adapter->EnumOutputs(0, &output))) - { - UINT num_modes = 0; - if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, nullptr))) - { - std::vector modes(num_modes); - if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, modes.data()))) - { - for (const DXGI_MODE_DESC& mode : modes) - { - adapter_info.fullscreen_modes.push_back(GetFullscreenModeString( - mode.Width, mode.Height, - static_cast(mode.RefreshRate.Numerator) / static_cast(mode.RefreshRate.Denominator))); - } - } - } - } - } - - // handle duplicate adapter names - if (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), - [&adapter_name](const std::string& other) { return (adapter_name == other); })) - { - std::string original_adapter_name = std::move(adapter_name); - - u32 current_extra = 2; - do - { - adapter_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra); - current_extra++; - } while (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), - [&adapter_name](const std::string& other) { return (adapter_name == other); })); - } - - adapter_info.adapter_names.push_back(std::move(adapter_name)); - } - - return adapter_info; -} - -HostDisplay::AdapterAndModeList D3D11HostDisplay::GetAdapterAndModeList() -{ - return GetAdapterAndModeList(m_dxgi_factory.Get()); -} - -bool D3D11HostDisplay::SetPostProcessingChain(const std::string_view& config) -{ - if (config.empty()) - { - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return true; - } - - if (!m_post_processing_chain.CreateFromString(config)) - return false; - - m_post_processing_stages.clear(); - - D3D11::ShaderCache shader_cache; - shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, - g_settings.gpu_use_debug_device); - - FrontendCommon::PostProcessingShaderGen shadergen(RenderAPI::D3D11, true); - u32 max_ubo_size = 0; - - for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) - { - const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); - const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); - const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); - - PostProcessingStage stage; - stage.uniforms_size = shader.GetUniformsSize(); - stage.vertex_shader = shader_cache.GetVertexShader(m_device.Get(), vs); - stage.pixel_shader = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!stage.vertex_shader || !stage.pixel_shader) - { - Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - max_ubo_size = std::max(max_ubo_size, stage.uniforms_size); - m_post_processing_stages.push_back(std::move(stage)); - } - - if (m_display_uniform_buffer.GetSize() < max_ubo_size && - !m_display_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, max_ubo_size)) - { - Log_ErrorPrintf("Failed to allocate %u byte constant buffer for postprocessing", max_ubo_size); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - m_post_processing_timer.Reset(); - return true; -} - -bool D3D11HostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) -{ - DebugAssert(!m_post_processing_stages.empty()); - - const GPUTexture::Format format = GPUTexture::Format::RGBA8; - const u32 bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - - if (m_post_processing_input_texture.GetWidth() != target_width || - m_post_processing_input_texture.GetHeight() != target_height) - { - if (!m_post_processing_input_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, format, - bind_flags)) - { - return false; - } - } - - const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); - for (u32 i = 0; i < target_count; i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) - { - if (!pps.output_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, format, bind_flags)) - return false; - } - } - - return true; -} - -void D3D11HostDisplay::ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, D3D11::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, u32 target_width, u32 target_height) -{ - if (!CheckPostProcessingRenderTargets(target_width, target_height)) - { - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - return; - } - - // downsample/upsample - use same viewport for remainder - m_context->ClearRenderTargetView(m_post_processing_input_texture.GetD3DRTV(), s_clear_color.data()); - m_context->OMSetRenderTargets(1, m_post_processing_input_texture.GetD3DRTVArray(), nullptr); - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - - const s32 orig_texture_width = texture_view_width; - const s32 orig_texture_height = texture_view_height; - texture = &m_post_processing_input_texture; - texture_view_x = final_left; - texture_view_y = final_top; - texture_view_width = final_width; - texture_view_height = final_height; - - const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; - for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - ID3D11RenderTargetView* rtv = (i == final_stage) ? final_target : pps.output_texture.GetD3DRTV(); - m_context->ClearRenderTargetView(rtv, s_clear_color.data()); - m_context->OMSetRenderTargets(1, &rtv, nullptr); - - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(pps.vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(pps.pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, texture->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_border_sampler.GetAddressOf()); - - const auto map = - m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), pps.uniforms_size); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - map.pointer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, - texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, - static_cast(m_post_processing_timer.GetTimeSeconds())); - m_display_uniform_buffer.Unmap(m_context.Get(), pps.uniforms_size); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - m_context->Draw(3, 0); - - if (i != final_stage) - texture = &pps.output_texture; - } - - ID3D11ShaderResourceView* null_srv = nullptr; - m_context->PSSetShaderResources(0, 1, &null_srv); -} - -bool D3D11HostDisplay::CreateTimestampQueries() -{ - for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) - { - for (u32 j = 0; j < 3; j++) - { - const CD3D11_QUERY_DESC qdesc((j == 0) ? D3D11_QUERY_TIMESTAMP_DISJOINT : D3D11_QUERY_TIMESTAMP); - const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - m_timestamp_queries = {}; - return false; - } - } - } - - KickTimestampQuery(); - return true; -} - -void D3D11HostDisplay::DestroyTimestampQueries() -{ - if (!m_timestamp_queries[0][0]) - return; - - if (m_timestamp_query_started) - m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); - - m_timestamp_queries = {}; - m_read_timestamp_query = 0; - m_write_timestamp_query = 0; - m_waiting_timestamp_queries = 0; - m_timestamp_query_started = 0; -} - -void D3D11HostDisplay::PopTimestampQuery() -{ - while (m_waiting_timestamp_queries > 0) - { - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; - const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint, - sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH); - if (disjoint_hr != S_OK) - break; - - if (disjoint.Disjoint) - { - Log_VerbosePrintf("GPU timing disjoint, resetting."); - m_read_timestamp_query = 0; - m_write_timestamp_query = 0; - m_waiting_timestamp_queries = 0; - m_timestamp_query_started = 0; - } - else - { - u64 start = 0, end = 0; - const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start, - sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH); - const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end), - D3D11_ASYNC_GETDATA_DONOTFLUSH); - if (start_hr == S_OK && end_hr == S_OK) - { - const float delta = - static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); - m_accumulated_gpu_time += delta; - m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; - m_waiting_timestamp_queries--; - } - } - } - - if (m_timestamp_query_started) - { - m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get()); - m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get()); - m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; - m_timestamp_query_started = false; - m_waiting_timestamp_queries++; - } -} - -void D3D11HostDisplay::KickTimestampQuery() -{ - if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) - return; - - m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get()); - m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); - m_timestamp_query_started = true; -} - -bool D3D11HostDisplay::SetGPUTimingEnabled(bool enabled) -{ - if (m_gpu_timing_enabled == enabled) - return true; - - m_gpu_timing_enabled = enabled; - if (m_gpu_timing_enabled) - { - if (!CreateTimestampQueries()) - return false; - - KickTimestampQuery(); - return true; - } - else - { - DestroyTimestampQueries(); - return true; - } -} - -float D3D11HostDisplay::GetAndResetAccumulatedGPUTime() -{ - const float value = m_accumulated_gpu_time; - m_accumulated_gpu_time = 0.0f; - return value; -} diff --git a/src/util/d3d11_host_display.h b/src/util/d3d11_host_display.h deleted file mode 100644 index 0aca3502c..000000000 --- a/src/util/d3d11_host_display.h +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/d3d11/stream_buffer.h" -#include "common/d3d11/texture.h" -#include "common/timer.h" -#include "common/window_info.h" -#include "common/windows_headers.h" -#include "host_display.h" -#include "postprocessing_chain.h" -#include -#include -#include -#include -#include -#include -#include - -class D3D11HostDisplay final : public HostDisplay -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - D3D11HostDisplay(); - ~D3D11HostDisplay(); - - RenderAPI GetRenderAPI() const override; - void* GetDevice() const override; - void* GetContext() const override; - - bool HasDevice() const override; - bool HasSurface() const override; - - bool CreateDevice(const WindowInfo& wi, bool vsync) override; - bool SetupDevice() override; - - bool MakeCurrent() override; - bool DoneCurrent() override; - - bool ChangeWindow(const WindowInfo& new_wi) override; - void ResizeWindow(s32 new_window_width, s32 new_window_height) override; - bool SupportsFullscreen() const override; - bool IsFullscreen() override; - bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; - AdapterAndModeList GetAdapterAndModeList() override; - void DestroySurface() override; - - bool SetPostProcessingChain(const std::string_view& config) override; - - std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, u32 data_stride, - bool dynamic = false) override; - bool BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) override; - void EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) override; - bool UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; - bool SupportsTextureFormat(GPUTexture::Format format) const override; - - bool GetHostRefreshRate(float* refresh_rate) override; - - bool SetGPUTimingEnabled(bool enabled) override; - float GetAndResetAccumulatedGPUTime() override; - - void SetVSync(bool enabled) override; - - bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; - - static AdapterAndModeList StaticGetAdapterAndModeList(); - -protected: - static constexpr u32 DISPLAY_UNIFORM_BUFFER_SIZE = 16; - static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; - - static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); - - bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); - void DestroyStagingBuffer(); - - bool CreateResources() override; - void DestroyResources() override; - - bool CreateImGuiContext() override; - void DestroyImGuiContext() override; - bool UpdateImGuiFontTexture() override; - - bool CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode); - bool CreateSwapChainRTV(); - - void RenderDisplay(); - void RenderSoftwareCursor(); - void RenderImGui(); - - void RenderDisplay(s32 left, s32 top, s32 width, s32 height, D3D11::Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter); - void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle); - - struct PostProcessingStage - { - ComPtr vertex_shader; - ComPtr pixel_shader; - D3D11::Texture output_texture; - u32 uniforms_size; - }; - - bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); - void ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, D3D11::Texture* texture, s32 texture_view_x, s32 texture_view_y, - s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); - - bool CreateTimestampQueries(); - void DestroyTimestampQueries(); - void PopTimestampQuery(); - void KickTimestampQuery(); - - ComPtr m_device; - ComPtr m_context; - - ComPtr m_dxgi_factory; - ComPtr m_swap_chain; - ComPtr m_swap_chain_rtv; - - ComPtr m_display_rasterizer_state; - ComPtr m_display_depth_stencil_state; - ComPtr m_display_blend_state; - ComPtr m_software_cursor_blend_state; - ComPtr m_display_vertex_shader; - ComPtr m_display_pixel_shader; - ComPtr m_display_alpha_pixel_shader; - ComPtr m_point_sampler; - ComPtr m_linear_sampler; - ComPtr m_border_sampler; - - D3D11::StreamBuffer m_display_uniform_buffer; - ComPtr m_readback_staging_texture; - DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; - u32 m_readback_staging_texture_width = 0; - u32 m_readback_staging_texture_height = 0; - - bool m_allow_tearing_supported = false; - bool m_using_flip_model_swap_chain = true; - bool m_using_allow_tearing = false; - - FrontendCommon::PostProcessingChain m_post_processing_chain; - D3D11::Texture m_post_processing_input_texture; - std::vector m_post_processing_stages; - Common::Timer m_post_processing_timer; - - std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; - u8 m_read_timestamp_query = 0; - u8 m_write_timestamp_query = 0; - u8 m_waiting_timestamp_queries = 0; - bool m_timestamp_query_started = false; - float m_accumulated_gpu_time = 0.0f; -}; diff --git a/src/util/d3d11_pipeline.cpp b/src/util/d3d11_pipeline.cpp new file mode 100644 index 000000000..c5721699f --- /dev/null +++ b/src/util/d3d11_pipeline.cpp @@ -0,0 +1,395 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d11_pipeline.h" +#include "d3d11_device.h" +#include "d3d_common.h" + +#include "common/log.h" + +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(D3D11Device); + +D3D11Shader::D3D11Shader(GPUShaderStage stage, Microsoft::WRL::ComPtr shader, + std::vector bytecode) + : GPUShader(stage), m_shader(std::move(shader)), m_bytecode(std::move(bytecode)) +{ +} + +D3D11Shader::~D3D11Shader() = default; + +ID3D11VertexShader* D3D11Shader::GetVertexShader() const +{ + DebugAssert(m_stage == GPUShaderStage::Vertex); + return static_cast(m_shader.Get()); +} + +ID3D11PixelShader* D3D11Shader::GetPixelShader() const +{ + DebugAssert(m_stage == GPUShaderStage::Fragment); + return static_cast(m_shader.Get()); +} + +ID3D11ComputeShader* D3D11Shader::GetComputeShader() const +{ + DebugAssert(m_stage == GPUShaderStage::Compute); + return static_cast(m_shader.Get()); +} + +void D3D11Shader::SetDebugName(const std::string_view& name) +{ + SetD3DDebugObjectName(m_shader.Get(), name); +} + +std::unique_ptr D3D11Device::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + ComPtr shader; + std::vector bytecode; + HRESULT hr; + switch (stage) + { + case GPUShaderStage::Vertex: + hr = m_device->CreateVertexShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); + bytecode.resize(data.size()); + std::memcpy(bytecode.data(), data.data(), data.size()); + break; + + case GPUShaderStage::Fragment: + hr = m_device->CreatePixelShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); + break; + + case GPUShaderStage::Compute: + hr = m_device->CreateComputeShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); + break; + + default: + UnreachableCode(); + break; + } + + if (!shader) + return {}; + + return std::unique_ptr(new D3D11Shader(stage, std::move(shader), std::move(bytecode))); +} + +std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary) +{ + std::optional> bytecode = + D3DCommon::CompileShader(m_device->GetFeatureLevel(), m_debug_device, stage, source, entry_point); + if (!bytecode.has_value()) + return {}; + + std::unique_ptr ret = CreateShaderFromBinary(stage, bytecode.value()); + if (ret && out_binary) + *out_binary = std::move(bytecode.value()); + + return ret; +} + +D3D11Pipeline::D3D11Pipeline(ComPtr rs, ComPtr ds, + ComPtr bs, ComPtr il, ComPtr vs, + ComPtr ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, + u32 blend_factor) + : m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)), + m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride), m_blend_factor(blend_factor), + m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor)) +{ +} + +D3D11Pipeline::~D3D11Pipeline() +{ + D3D11Device::GetInstance().UnbindPipeline(this); +} + +void D3D11Pipeline::SetDebugName(const std::string_view& name) +{ + // can't label this directly +} + +D3D11Device::ComPtr D3D11Device::GetRasterizationState(const GPUPipeline::RasterizationState& rs) +{ + ComPtr drs; + + const auto it = m_rasterization_states.find(rs.key); + if (it != m_rasterization_states.end()) + { + drs = it->second; + return drs; + } + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + D3D11_CULL_NONE, // None + D3D11_CULL_FRONT, // Front + D3D11_CULL_BACK, // Back + }}; + + D3D11_RASTERIZER_DESC desc = {}; + desc.FillMode = D3D11_FILL_SOLID; + desc.CullMode = cull_mapping[static_cast(rs.cull_mode.GetValue())]; + desc.ScissorEnable = TRUE; + // desc.MultisampleEnable ??? + + HRESULT hr = m_device->CreateRasterizerState(&desc, drs.GetAddressOf()); + if (FAILED(hr)) + Log_ErrorPrintf("Failed to create depth state with %08X", hr); + + m_rasterization_states.emplace(rs.key, drs); + return drs; +} + +D3D11Device::ComPtr D3D11Device::GetDepthState(const GPUPipeline::DepthState& ds) +{ + ComPtr dds; + + const auto it = m_depth_states.find(ds.key); + if (it != m_depth_states.end()) + { + dds = it->second; + return dds; + } + + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = + {{ + D3D11_COMPARISON_NEVER, // Never + D3D11_COMPARISON_ALWAYS, // Always + D3D11_COMPARISON_LESS, // Less + D3D11_COMPARISON_LESS_EQUAL, // LessEqual + D3D11_COMPARISON_GREATER, // Greater + D3D11_COMPARISON_GREATER_EQUAL, // GreaterEqual + D3D11_COMPARISON_EQUAL, // Equal + }}; + + D3D11_DEPTH_STENCIL_DESC desc = {}; + desc.DepthEnable = ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write; + desc.DepthFunc = func_mapping[static_cast(ds.depth_test.GetValue())]; + desc.DepthWriteMask = ds.depth_write ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + + HRESULT hr = m_device->CreateDepthStencilState(&desc, dds.GetAddressOf()); + if (FAILED(hr)) + Log_ErrorPrintf("Failed to create depth state with %08X", hr); + + m_depth_states.emplace(ds.key, dds); + return dds; +} + +D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs) +{ + ComPtr dbs; + + const auto it = m_blend_states.find(bs.key); + if (it != m_blend_states.end()) + { + dbs = it->second; + return dbs; + } + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + D3D11_BLEND_ZERO, // Zero + D3D11_BLEND_ONE, // One + D3D11_BLEND_SRC_COLOR, // SrcColor + D3D11_BLEND_INV_SRC_COLOR, // InvSrcColor + D3D11_BLEND_DEST_COLOR, // DstColor + D3D11_BLEND_INV_DEST_COLOR, // InvDstColor + D3D11_BLEND_SRC_ALPHA, // SrcAlpha + D3D11_BLEND_INV_SRC_ALPHA, // InvSrcAlpha + D3D11_BLEND_SRC1_ALPHA, // SrcAlpha1 + D3D11_BLEND_INV_SRC1_ALPHA, // InvSrcAlpha1 + D3D11_BLEND_DEST_ALPHA, // DstAlpha + D3D11_BLEND_INV_DEST_ALPHA, // InvDstAlpha + D3D11_BLEND_BLEND_FACTOR, // ConstantColor + D3D11_BLEND_INV_BLEND_FACTOR, // InvConstantColor + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + D3D11_BLEND_OP_ADD, // Add + D3D11_BLEND_OP_SUBTRACT, // Subtract + D3D11_BLEND_OP_REV_SUBTRACT, // ReverseSubtract + D3D11_BLEND_OP_MIN, // Min + D3D11_BLEND_OP_MAX, // Max + }}; + + D3D11_BLEND_DESC blend_desc = {}; + D3D11_RENDER_TARGET_BLEND_DESC& tgt_desc = blend_desc.RenderTarget[0]; + tgt_desc.BlendEnable = bs.enable; + tgt_desc.RenderTargetWriteMask = bs.write_mask; + if (bs.enable) + { + tgt_desc.SrcBlend = blend_mapping[static_cast(bs.src_blend.GetValue())]; + tgt_desc.DestBlend = blend_mapping[static_cast(bs.dst_blend.GetValue())]; + tgt_desc.BlendOp = op_mapping[static_cast(bs.blend_op.GetValue())]; + tgt_desc.SrcBlendAlpha = blend_mapping[static_cast(bs.src_alpha_blend.GetValue())]; + tgt_desc.DestBlendAlpha = blend_mapping[static_cast(bs.dst_alpha_blend.GetValue())]; + tgt_desc.BlendOpAlpha = op_mapping[static_cast(bs.alpha_blend_op.GetValue())]; + } + + HRESULT hr = m_device->CreateBlendState(&blend_desc, dbs.GetAddressOf()); + if (FAILED(hr)) + Log_ErrorPrintf("Failed to create blend state with %08X", hr); + + m_blend_states.emplace(bs.key, dbs); + return dbs; +} + +D3D11Device::ComPtr D3D11Device::GetInputLayout(const GPUPipeline::InputLayout& il, + const D3D11Shader* vs) +{ + ComPtr dil; + const auto it = m_input_layouts.find(il); + if (it != m_input_layouts.end()) + { + dil = it->second; + return dil; + } + + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const DXGI_FORMAT + format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}, // Float + {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UINT}, // UInt8 + {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_SINT}, // SInt8 + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM}, // UNorm8 + {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_UINT}, // UInt16 + {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_SINT}, // SInt16 + {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_UNORM}, // UNorm16 + {DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_UINT}, // UInt32 + {DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_SINT}, // SInt32 + }; + + D3D11_INPUT_ELEMENT_DESC* elems = + static_cast(alloca(sizeof(D3D11_INPUT_ELEMENT_DESC) * il.vertex_attributes.size())); + for (size_t i = 0; i < il.vertex_attributes.size(); i++) + { + const GPUPipeline::VertexAttribute& va = il.vertex_attributes[i]; + Assert(va.components > 0 && va.components <= MAX_COMPONENTS); + + D3D11_INPUT_ELEMENT_DESC& elem = elems[i]; + elem.SemanticName = "ATTR"; + elem.SemanticIndex = va.index; + elem.Format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; + elem.InputSlot = 0; + elem.AlignedByteOffset = va.offset; + elem.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + elem.InstanceDataStepRate = 0; + } + + HRESULT hr = m_device->CreateInputLayout(elems, static_cast(il.vertex_attributes.size()), + vs->GetBytecode().data(), vs->GetBytecode().size(), dil.GetAddressOf()); + if (FAILED(hr)) + Log_ErrorPrintf("Failed to create input layout with %08X", hr); + + m_input_layouts.emplace(il, dil); + return dil; +} + +std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ + ComPtr rs = GetRasterizationState(config.rasterization); + ComPtr ds = GetDepthState(config.depth); + ComPtr bs = GetBlendState(config.blend); + if (!rs || !ds || !bs) + return {}; + + ComPtr il; + u32 vertex_stride = 0; + if (!config.input_layout.vertex_attributes.empty()) + { + il = GetInputLayout(config.input_layout, static_cast(config.vertex_shader)); + vertex_stride = config.input_layout.vertex_stride; + if (!il) + return {}; + } + + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = + {{ + D3D11_PRIMITIVE_TOPOLOGY_POINTLIST, // Points + D3D11_PRIMITIVE_TOPOLOGY_LINELIST, // Lines + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, // Triangles + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, // TriangleStrips + }}; + + return std::unique_ptr( + new D3D11Pipeline(std::move(rs), std::move(ds), std::move(bs), std::move(il), + static_cast(config.vertex_shader)->GetVertexShader(), + static_cast(config.fragment_shader)->GetPixelShader(), + primitives[static_cast(config.primitive)], vertex_stride, config.blend.constant)); +} + +void D3D11Device::SetPipeline(GPUPipeline* pipeline) +{ + if (m_current_pipeline == pipeline) + return; + + D3D11Pipeline* const PL = static_cast(pipeline); + m_current_pipeline = PL; + + if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il) + { + m_current_input_layout = il; + m_context->IASetInputLayout(il); + } + + if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride) + { + const UINT offset = 0; + m_current_vertex_stride = PL->GetVertexStride(); + m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset); + } + + if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology) + { + m_current_primitive_topology = topology; + m_context->IASetPrimitiveTopology(topology); + } + + if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs) + { + m_current_vertex_shader = vs; + m_context->VSSetShader(vs, nullptr, 0); + } + + if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps) + { + m_current_pixel_shader = ps; + m_context->PSSetShader(ps, nullptr, 0); + } + + if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs) + { + m_current_rasterizer_state = rs; + m_context->RSSetState(rs); + } + + if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) + { + m_current_depth_state = ds; + m_context->OMSetDepthStencilState(ds, 0); + } + + if (ID3D11BlendState* bs = PL->GetBlendState(); + m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor()) + { + m_current_blend_state = bs; + m_current_blend_factor = PL->GetBlendFactor(); + m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu); + } +} + +void D3D11Device::UnbindPipeline(D3D11Pipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + // Let the runtime deal with the dead objects... + m_current_pipeline = nullptr; +} diff --git a/src/util/d3d11_pipeline.h b/src/util/d3d11_pipeline.h new file mode 100644 index 000000000..9c22c1fe5 --- /dev/null +++ b/src/util/d3d11_pipeline.h @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" + +#include "common/windows_headers.h" + +#include +#include +#include +#include +#include +#include + +class D3D11Device; + +class D3D11Shader final : public GPUShader +{ + friend D3D11Device; + +public: + ~D3D11Shader() override; + + ID3D11VertexShader* GetVertexShader() const; + ID3D11PixelShader* GetPixelShader() const; + ID3D11ComputeShader* GetComputeShader() const; + + ALWAYS_INLINE const std::vector& GetBytecode() const { return m_bytecode; } + + void SetDebugName(const std::string_view& name) override; + +private: + D3D11Shader(GPUShaderStage stage, Microsoft::WRL::ComPtr shader, std::vector bytecode); + + Microsoft::WRL::ComPtr m_shader; + std::vector m_bytecode; // only for VS +}; + +class D3D11Pipeline final : public GPUPipeline +{ + friend D3D11Device; + + template + using ComPtr = Microsoft::WRL::ComPtr; + +public: + ~D3D11Pipeline() override; + + void SetDebugName(const std::string_view& name) override; + + ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); } + ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); } + ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); } + ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); } + ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); } + ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); } + ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; } + ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; } + ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; } + ALWAYS_INLINE const std::array& GetBlendFactorFloat() const { return m_blend_factor_float; } + +private: + D3D11Pipeline(ComPtr rs, ComPtr ds, ComPtr bs, + ComPtr il, ComPtr vs, ComPtr ps, + D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor); + + ComPtr m_rs; + ComPtr m_ds; + ComPtr m_bs; + ComPtr m_il; + ComPtr m_vs; + ComPtr m_ps; + D3D11_PRIMITIVE_TOPOLOGY m_topology; + u32 m_vertex_stride; + u32 m_blend_factor; + std::array m_blend_factor_float; +}; diff --git a/src/common/d3d11/stream_buffer.cpp b/src/util/d3d11_stream_buffer.cpp similarity index 72% rename from src/common/d3d11/stream_buffer.cpp rename to src/util/d3d11_stream_buffer.cpp index ad39eb95f..d424510a3 100644 --- a/src/common/d3d11/stream_buffer.cpp +++ b/src/util/d3d11_stream_buffer.cpp @@ -1,29 +1,31 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "stream_buffer.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -Log_SetChannel(D3D11); +#include "d3d11_stream_buffer.h" -namespace D3D11 { +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" -StreamBuffer::StreamBuffer() : m_size(0), m_position(0) {} +Log_SetChannel(D3D11Device); -StreamBuffer::StreamBuffer(ComPtr buffer) : m_buffer(std::move(buffer)), m_position(0) +D3D11StreamBuffer::D3D11StreamBuffer() : m_size(0), m_position(0) +{ +} + +D3D11StreamBuffer::D3D11StreamBuffer(ComPtr buffer) : m_buffer(std::move(buffer)), m_position(0) { D3D11_BUFFER_DESC desc; m_buffer->GetDesc(&desc); m_size = desc.ByteWidth; } -StreamBuffer::~StreamBuffer() +D3D11StreamBuffer::~D3D11StreamBuffer() { - Release(); + Destroy(); } -bool StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size) +bool D3D11StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size) { CD3D11_BUFFER_DESC desc(size, bind_flags, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE, 0, 0); ComPtr buffer; @@ -65,22 +67,12 @@ bool StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 return true; } -void StreamBuffer::Adopt(ComPtr buffer) -{ - m_buffer = std::move(buffer); - - D3D11_BUFFER_DESC desc; - m_buffer->GetDesc(&desc); - m_size = desc.ByteWidth; - m_position = 0; -} - -void StreamBuffer::Release() +void D3D11StreamBuffer::Destroy() { m_buffer.Reset(); } -StreamBuffer::MappingResult StreamBuffer::Map(ID3D11DeviceContext* context, u32 alignment, u32 min_size) +D3D11StreamBuffer::MappingResult D3D11StreamBuffer::Map(ID3D11DeviceContext1* context, u32 alignment, u32 min_size) { m_position = Common::AlignUp(m_position, alignment); if ((m_position + min_size) >= m_size || !m_use_map_no_overwrite) @@ -104,10 +96,8 @@ StreamBuffer::MappingResult StreamBuffer::Map(ID3D11DeviceContext* context, u32 (m_size - m_position) / alignment}; } -void StreamBuffer::Unmap(ID3D11DeviceContext* context, u32 used_size) +void D3D11StreamBuffer::Unmap(ID3D11DeviceContext1* context, u32 used_size) { context->Unmap(m_buffer.Get(), 0); m_position += used_size; } - -} // namespace D3D11 \ No newline at end of file diff --git a/src/common/d3d11/stream_buffer.h b/src/util/d3d11_stream_buffer.h similarity index 64% rename from src/common/d3d11/stream_buffer.h rename to src/util/d3d11_stream_buffer.h index aac81b93b..92dc68158 100644 --- a/src/common/d3d11/stream_buffer.h +++ b/src/util/d3d11_stream_buffer.h @@ -1,22 +1,23 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../types.h" -#include "../windows_headers.h" -#include + +#include "common/types.h" +#include "common/windows_headers.h" + +#include #include -namespace D3D11 { -class StreamBuffer +class D3D11StreamBuffer { public: template using ComPtr = Microsoft::WRL::ComPtr; - StreamBuffer(); - StreamBuffer(ComPtr buffer); - ~StreamBuffer(); + D3D11StreamBuffer(); + D3D11StreamBuffer(ComPtr buffer); + ~D3D11StreamBuffer(); ALWAYS_INLINE ID3D11Buffer* GetD3DBuffer() const { return m_buffer.Get(); } ALWAYS_INLINE ID3D11Buffer* const* GetD3DBufferArray() const { return m_buffer.GetAddressOf(); } @@ -24,9 +25,8 @@ public: ALWAYS_INLINE u32 GetPosition() const { return m_position; } bool Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size); - void Adopt(ComPtr buffer); - void Release(); - + void Destroy(); + struct MappingResult { void* pointer; @@ -35,8 +35,8 @@ public: u32 space_aligned; // remaining space / alignment }; - MappingResult Map(ID3D11DeviceContext* context, u32 alignment, u32 min_size); - void Unmap(ID3D11DeviceContext* context, u32 used_size); + MappingResult Map(ID3D11DeviceContext1* context, u32 alignment, u32 min_size); + void Unmap(ID3D11DeviceContext1* context, u32 used_size); private: ComPtr m_buffer; @@ -44,4 +44,3 @@ private: u32 m_position; bool m_use_map_no_overwrite = false; }; -} // namespace GL \ No newline at end of file diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp new file mode 100644 index 000000000..a5399da8a --- /dev/null +++ b/src/util/d3d11_texture.cpp @@ -0,0 +1,509 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d11_texture.h" +#include "d3d11_device.h" +#include "d3d_common.h" + +// #include "common/align.h" +// #include "common/assert.h" +// #include "common/file_system.h" +#include "common/log.h" +// #include "common/path.h" +// #include "common/rectangle.h" +#include "common/string_util.h" + +#include "fmt/format.h" + +#include + +Log_SetChannel(D3D11Device); + +std::unique_ptr D3D11Device::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data, u32 data_stride, bool dynamic /* = false */) +{ + std::unique_ptr tex = std::make_unique(); + if (!tex->Create(m_device.Get(), width, height, layers, levels, samples, type, format, data, data_stride, dynamic)) + tex.reset(); + + return tex; +} + +bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ + const D3D11Texture* tex = static_cast(texture); + if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat())) + return false; + + const CD3D11_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), + static_cast(y + height), 1); + m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box); + + D3D11_MAPPED_SUBRESOURCE sr; + HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, D3D11_MAP_READ, 0, &sr); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); + return false; + } + + const u32 copy_size = tex->GetPixelSize() * width; + StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); + m_context->Unmap(m_readback_staging_texture.Get(), 0); + return true; +} + +bool D3D11Device::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) +{ + if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && + m_readback_staging_texture_format == format) + return true; + + DestroyStagingBuffer(); + + CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ); + HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr); + return false; + } + + return true; +} + +void D3D11Device::DestroyStagingBuffer() +{ + m_readback_staging_texture.Reset(); + m_readback_staging_texture_width = 0; + m_readback_staging_texture_height = 0; + m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; +} + +bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const +{ + const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format; + if (dfmt == DXGI_FORMAT_UNKNOWN) + return false; + + UINT support = 0; + const UINT required = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support)) && ((support & required) == required)); +} + +D3D11Framebuffer::D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, + ComPtr rtv, ComPtr dsv) + : GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) +{ +} + +D3D11Framebuffer::~D3D11Framebuffer() +{ + D3D11Device::GetInstance().UnbindFramebuffer(this); +} + +void D3D11Framebuffer::SetDebugName(const std::string_view& name) +{ + if (m_rtv) + SetD3DDebugObjectName(m_rtv.Get(), fmt::format("{} RTV", name)); + if (m_dsv) + SetD3DDebugObjectName(m_dsv.Get(), fmt::format("{} DSV", name)); +} + +void D3D11Framebuffer::CommitClear(ID3D11DeviceContext1* context) +{ + if (UNLIKELY(m_rt && m_rt->GetState() != GPUTexture::State::Dirty)) + { + if (m_rt->GetState() == GPUTexture::State::Invalidated) + context->DiscardView(m_rtv.Get()); + else + context->ClearRenderTargetView(m_rtv.Get(), m_rt->GetUNormClearColor().data()); + + m_rt->SetState(GPUTexture::State::Dirty); + } + + if (UNLIKELY(m_ds && m_ds->GetState() != GPUTexture::State::Dirty)) + { + if (m_ds->GetState() == GPUTexture::State::Invalidated) + context->DiscardView(m_dsv.Get()); + else + context->ClearDepthStencilView(m_dsv.Get(), D3D11_CLEAR_DEPTH, m_ds->GetClearDepth(), 0); + + m_ds->SetState(GPUTexture::State::Dirty); + } +} + +std::unique_ptr D3D11Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds) +{ + DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); + D3D11Texture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); + D3D11Texture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); + + ComPtr rtv; + if (RT) + { + rtv = RT->GetD3DRTV(); + Assert(rtv); + } + + ComPtr dsv; + if (DS) + { + dsv = DS->GetD3DDSV(); + Assert(dsv); + } + + return std::unique_ptr(new D3D11Framebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(), + RT ? RT->GetHeight() : DS->GetHeight(), std::move(rtv), + std::move(dsv))); +} + +D3D11Sampler::D3D11Sampler(ComPtr ss) : m_ss(std::move(ss)) +{ +} + +D3D11Sampler::~D3D11Sampler() = default; + +void D3D11Sampler::SetDebugName(const std::string_view& name) +{ + SetD3DDebugObjectName(m_ss.Get(), name); +} + +std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& config) +{ + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + D3D11_TEXTURE_ADDRESS_WRAP, // Repeat + D3D11_TEXTURE_ADDRESS_CLAMP, // ClampToEdge + D3D11_TEXTURE_ADDRESS_BORDER, // ClampToBorder + }}; + static constexpr u8 filter_count = static_cast(GPUSampler::Filter::MaxCount); + static constexpr D3D11_FILTER filters[filter_count][filter_count][filter_count] = { + { + {D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT}, + {D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT, D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT}, + }, + { + {D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR, D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR}, + {D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, D3D11_FILTER_MIN_MAG_MIP_LINEAR}, + }}; + + D3D11_SAMPLER_DESC desc = {}; + desc.AddressU = ta[static_cast(config.address_u.GetValue())]; + desc.AddressV = ta[static_cast(config.address_v.GetValue())]; + desc.AddressW = ta[static_cast(config.address_w.GetValue())]; + std::memcpy(desc.BorderColor, RGBA8ToFloat(config.border_color).data(), sizeof(desc.BorderColor)); + desc.MinLOD = static_cast(config.min_lod); + desc.MaxLOD = static_cast(config.max_lod); + + if (config.anisotropy > 0) + { + desc.Filter = D3D11_FILTER_ANISOTROPIC; + desc.MaxAnisotropy = config.anisotropy; + } + else + { + desc.Filter = filters[static_cast(config.mip_filter.GetValue())][static_cast(config.min_filter.GetValue())] + [static_cast(config.mag_filter.GetValue())]; + desc.MaxAnisotropy = 1; + } + + ComPtr ss; + const HRESULT hr = m_device->CreateSamplerState(&desc, ss.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateSamplerState() failed: %08X", hr); + return {}; + } + + return std::unique_ptr(new D3D11Sampler(std::move(ss))); +} + +D3D11Texture::D3D11Texture() = default; + +D3D11Texture::~D3D11Texture() +{ + Destroy(); +} + +D3D11_TEXTURE2D_DESC D3D11Texture::GetDesc() const +{ + D3D11_TEXTURE2D_DESC desc; + m_texture->GetDesc(&desc); + return desc; +} + +void D3D11Texture::CommitClear(ID3D11DeviceContext1* context) +{ + if (m_state == GPUTexture::State::Dirty) + return; + + if (IsDepthStencil()) + { + if (m_state == GPUTexture::State::Invalidated) + context->DiscardView(GetD3DDSV()); + else + context->ClearDepthStencilView(GetD3DDSV(), D3D11_CLEAR_DEPTH, GetClearDepth(), 0); + } + else if (IsRenderTarget()) + { + if (m_state == GPUTexture::State::Invalidated) + context->DiscardView(GetD3DRTV()); + else + context->ClearRenderTargetView(GetD3DRTV(), GetUNormClearColor().data()); + } + + m_state = GPUTexture::State::Dirty; +} + +bool D3D11Texture::IsValid() const +{ + return static_cast(m_texture); +} + +bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + if (m_dynamic) + { + void* map; + u32 map_stride; + if (!Map(&map, &map_stride, x, y, width, height, layer, level)) + return false; + + StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height); + Unmap(); + return true; + } + + const CD3D11_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), + static_cast(y + height), 1); + const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); + + ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); + CommitClear(context); + context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); + m_state = GPUTexture::State::Dirty; + return true; +} + +bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + if (!m_dynamic || (x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || + level > m_levels) + { + return false; + } + + const bool discard = (width == m_width && height == m_height); + const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); + + ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); + CommitClear(context); + + D3D11_MAPPED_SUBRESOURCE sr; + HRESULT hr = context->Map(m_texture.Get(), srnum, discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_READ_WRITE, 0, &sr); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map pixels texture failed: %08X", hr); + return false; + } + + *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); + *map_stride = sr.RowPitch; + m_mapped_subresource = srnum; + m_state = GPUTexture::State::Dirty; + return true; +} + +void D3D11Texture::Unmap() +{ + D3D11Device::GetD3DContext()->Unmap(m_texture.Get(), m_mapped_subresource); + m_mapped_subresource = 0; +} + +void D3D11Texture::SetDebugName(const std::string_view& name) +{ + SetD3DDebugObjectName(m_texture.Get(), name); +} + +DXGI_FORMAT D3D11Texture::GetDXGIFormat() const +{ + return D3DCommon::GetFormatMapping(m_format).resource_format; +} + +bool D3D11Texture::Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, const void* initial_data /* = nullptr */, u32 initial_data_stride /* = 0 */, + bool dynamic /* = false */) +{ + if (!ValidateConfig(width, height, layers, layers, samples, type, format)) + return false; + + u32 bind_flags = 0; + switch (type) + { + case Type::RenderTarget: + bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + break; + case Type::DepthStencil: + bind_flags = D3D11_BIND_DEPTH_STENCIL; // | D3D11_BIND_SHADER_RESOURCE; + break; + case Type::Texture: + bind_flags = D3D11_BIND_SHADER_RESOURCE; + break; + case Type::RWTexture: + bind_flags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + break; + default: + break; + } + + const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format); + + CD3D11_TEXTURE2D_DESC desc(fm.resource_format, width, height, layers, levels, bind_flags, + dynamic ? D3D11_USAGE_DYNAMIC : D3D11_USAGE_DEFAULT, dynamic ? D3D11_CPU_ACCESS_WRITE : 0, + samples, 0, 0); + + D3D11_SUBRESOURCE_DATA srd; + srd.pSysMem = initial_data; + srd.SysMemPitch = initial_data_stride; + srd.SysMemSlicePitch = initial_data_stride * height; + + ComPtr texture; + const HRESULT tex_hr = device->CreateTexture2D(&desc, initial_data ? &srd : nullptr, texture.GetAddressOf()); + if (FAILED(tex_hr)) + { + Log_ErrorPrintf( + "Create texture failed: 0x%08X (%ux%u levels:%u samples:%u format:%u bind_flags:%X initial_data:%p)", tex_hr, + width, height, levels, samples, static_cast(format), bind_flags, initial_data); + return false; + } + + ComPtr srv; + if (bind_flags & D3D11_BIND_SHADER_RESOURCE) + { + const D3D11_SRV_DIMENSION srv_dimension = + (desc.SampleDesc.Count > 1) ? + D3D11_SRV_DIMENSION_TEXTURE2DMS : + (desc.ArraySize > 1 ? D3D11_SRV_DIMENSION_TEXTURE2DARRAY : D3D11_SRV_DIMENSION_TEXTURE2D); + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, fm.srv_format, 0, desc.MipLevels, 0, desc.ArraySize); + const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create SRV for texture failed: 0x%08X", hr); + return false; + } + } + + ComPtr rtv_dsv; + if (bind_flags & D3D11_BIND_RENDER_TARGET) + { + const D3D11_RTV_DIMENSION rtv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_RTV_DIMENSION_TEXTURE2DMS : D3D11_RTV_DIMENSION_TEXTURE2D; + const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, fm.rtv_format, 0, 0, desc.ArraySize); + ComPtr rtv; + const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create RTV for texture failed: 0x%08X", hr); + return false; + } + + rtv_dsv = std::move(rtv); + } + else if (bind_flags & D3D11_BIND_DEPTH_STENCIL) + { + const D3D11_DSV_DIMENSION dsv_dimension = + (desc.SampleDesc.Count > 1) ? D3D11_DSV_DIMENSION_TEXTURE2DMS : D3D11_DSV_DIMENSION_TEXTURE2D; + const CD3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc(dsv_dimension, fm.dsv_format, 0, 0, desc.ArraySize); + ComPtr dsv; + const HRESULT hr = device->CreateDepthStencilView(texture.Get(), &dsv_desc, dsv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create DSV for texture failed: 0x%08X", hr); + return false; + } + + rtv_dsv = std::move(dsv); + } + + m_texture = std::move(texture); + m_srv = std::move(srv); + m_rtv_dsv = std::move(rtv_dsv); + m_width = static_cast(width); + m_height = static_cast(height); + m_layers = static_cast(layers); + m_levels = static_cast(levels); + m_samples = static_cast(samples); + m_type = type; + m_format = format; + m_dynamic = dynamic; + return true; +} + +void D3D11Texture::Destroy() +{ + D3D11Device::GetInstance().UnbindTexture(this); + m_rtv_dsv.Reset(); + m_srv.Reset(); + m_texture.Reset(); + m_dynamic = false; + ClearBaseProperties(); +} + +D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) +{ +} + +D3D11TextureBuffer::~D3D11TextureBuffer() = default; + +bool D3D11TextureBuffer::CreateBuffer(ID3D11Device* device) +{ + if (!m_buffer.Create(device, D3D11_BIND_SHADER_RESOURCE, GetSizeInBytes())) + return false; + + static constexpr std::array(Format::MaxCount)> dxgi_formats = {{ + DXGI_FORMAT_R16_UINT, + }}; + + CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_buffer.GetD3DBuffer(), dxgi_formats[static_cast(m_format)], 0, + m_size_in_elements); + const HRESULT hr = device->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateShaderResourceView() failed: %08X", hr); + return false; + } + + return true; +} + +void* D3D11TextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const auto res = m_buffer.Map(D3D11Device::GetD3DContext(), esize, esize * required_elements); + m_current_position = res.index_aligned; + return res.pointer; +} + +void D3D11TextureBuffer::Unmap(u32 used_elements) +{ + m_buffer.Unmap(D3D11Device::GetD3DContext(), used_elements * GetElementSize(m_format)); +} + +void D3D11TextureBuffer::SetDebugName(const std::string_view& name) +{ + SetD3DDebugObjectName(m_buffer.GetD3DBuffer(), name); +} + +std::unique_ptr D3D11Device::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ + std::unique_ptr tb = std::make_unique(format, size_in_elements); + if (!tb->CreateBuffer(m_device.Get())) + tb.reset(); + + return tb; +} diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h new file mode 100644 index 000000000..a93c614f8 --- /dev/null +++ b/src/util/d3d11_texture.h @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "d3d11_stream_buffer.h" +#include "gpu_device.h" + +#include "common/windows_headers.h" + +#include +#include + +class D3D11Device; + +class D3D11Framebuffer final : public GPUFramebuffer +{ + friend D3D11Device; + + template + using ComPtr = Microsoft::WRL::ComPtr; + +public: + ~D3D11Framebuffer() override; + + ALWAYS_INLINE u32 GetNumRTVs() const { return m_rtv ? 1 : 0; } + ALWAYS_INLINE ID3D11RenderTargetView* GetRTV() const { return m_rtv.Get(); } + ALWAYS_INLINE ID3D11RenderTargetView* const* GetRTVArray() const { return m_rtv.GetAddressOf(); } + ALWAYS_INLINE ID3D11DepthStencilView* GetDSV() const { return m_dsv.Get(); } + + void SetDebugName(const std::string_view& name) override; + void CommitClear(ID3D11DeviceContext1* context); + +private: + D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, ComPtr rtv, + ComPtr dsv); + + ComPtr m_rtv; + ComPtr m_dsv; +}; + +class D3D11Sampler final : public GPUSampler +{ + friend D3D11Device; + + template + using ComPtr = Microsoft::WRL::ComPtr; + +public: + ~D3D11Sampler() override; + + ALWAYS_INLINE ID3D11SamplerState* GetSamplerState() const { return m_ss.Get(); } + ALWAYS_INLINE ID3D11SamplerState* const* GetSamplerStateArray() const { return m_ss.GetAddressOf(); } + + void SetDebugName(const std::string_view& name) override; + +private: + D3D11Sampler(ComPtr ss); + + ComPtr m_ss; +}; + +class D3D11Texture final : public GPUTexture +{ + friend D3D11Device; + +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + D3D11Texture(); + ~D3D11Texture(); + + ALWAYS_INLINE ID3D11Texture2D* GetD3DTexture() const { return m_texture.Get(); } + ALWAYS_INLINE ID3D11ShaderResourceView* GetD3DSRV() const { return m_srv.Get(); } + ALWAYS_INLINE ID3D11View* GetRTVOrDSV() const { return m_rtv_dsv.Get(); } + ALWAYS_INLINE ID3D11RenderTargetView* GetD3DRTV() const + { + return static_cast(m_rtv_dsv.Get()); + } + ALWAYS_INLINE ID3D11DepthStencilView* GetD3DDSV() const + { + return static_cast(m_rtv_dsv.Get()); + } + ALWAYS_INLINE ID3D11ShaderResourceView* const* GetD3DSRVArray() const { return m_srv.GetAddressOf(); } + ALWAYS_INLINE ID3D11RenderTargetView* const* GetD3DRTVArray() const + { + return reinterpret_cast(m_rtv_dsv.GetAddressOf()); + } + ALWAYS_INLINE bool IsDynamic() const { return m_dynamic; } + DXGI_FORMAT GetDXGIFormat() const; + + ALWAYS_INLINE operator ID3D11Texture2D*() const { return m_texture.Get(); } + ALWAYS_INLINE operator ID3D11ShaderResourceView*() const { return m_srv.Get(); } + ALWAYS_INLINE operator ID3D11RenderTargetView*() const + { + return static_cast(m_rtv_dsv.Get()); + } + ALWAYS_INLINE operator ID3D11DepthStencilView*() const + { + return static_cast(m_rtv_dsv.Get()); + } + ALWAYS_INLINE operator bool() const { return static_cast(m_texture); } + + bool Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0, bool dynamic = false); + + void Destroy(); + + D3D11_TEXTURE2D_DESC GetDesc() const; + void CommitClear(ID3D11DeviceContext1* context); + + bool IsValid() const override; + + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + + void SetDebugName(const std::string_view& name) override; + +private: + ComPtr m_texture; + ComPtr m_srv; + ComPtr m_rtv_dsv; + u32 m_mapped_subresource = 0; + bool m_dynamic = false; +}; + +class D3D11TextureBuffer final : public GPUTextureBuffer +{ +public: + D3D11TextureBuffer(Format format, u32 size_in_elements); + ~D3D11TextureBuffer() override; + + ALWAYS_INLINE ID3D11Buffer* GetBuffer() const { return m_buffer.GetD3DBuffer(); } + ALWAYS_INLINE ID3D11ShaderResourceView* GetSRV() const { return m_srv.Get(); } + ALWAYS_INLINE ID3D11ShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); } + + bool CreateBuffer(ID3D11Device* device); + + // Inherited via GPUTextureBuffer + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; + + void SetDebugName(const std::string_view& name) override; + +private: + D3D11StreamBuffer m_buffer; + Microsoft::WRL::ComPtr m_srv; +}; diff --git a/src/util/d3d12_builders.cpp b/src/util/d3d12_builders.cpp new file mode 100644 index 000000000..94ce24ddc --- /dev/null +++ b/src/util/d3d12_builders.cpp @@ -0,0 +1,352 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d12_builders.h" +#include "d3d12_device.h" + +#include "common/assert.h" +#include "common/log.h" +#include "common/string_util.h" + +#include +#include + +Log_SetChannel(D3D12Device); + +D3D12::GraphicsPipelineBuilder::GraphicsPipelineBuilder() +{ + Clear(); +} + +void D3D12::GraphicsPipelineBuilder::Clear() +{ + std::memset(&m_desc, 0, sizeof(m_desc)); + std::memset(m_input_elements.data(), 0, sizeof(D3D12_INPUT_ELEMENT_DESC) * m_input_elements.size()); + m_desc.NodeMask = 1; + m_desc.SampleMask = 0xFFFFFFFF; + m_desc.SampleDesc.Count = 1; +} + +Microsoft::WRL::ComPtr D3D12::GraphicsPipelineBuilder::Create(ID3D12Device* device, + bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr ps; + HRESULT hr = device->CreateGraphicsPipelineState(&m_desc, IID_PPV_ARGS(ps.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateGraphicsPipelineState() failed: %08X", hr); + return {}; + } + + if (clear) + Clear(); + + return ps; +} + +void D3D12::GraphicsPipelineBuilder::SetRootSignature(ID3D12RootSignature* rs) +{ + m_desc.pRootSignature = rs; +} + +void D3D12::GraphicsPipelineBuilder::SetVertexShader(const ID3DBlob* blob) +{ + SetVertexShader(const_cast(blob)->GetBufferPointer(), + static_cast(const_cast(blob)->GetBufferSize())); +} + +void D3D12::GraphicsPipelineBuilder::SetVertexShader(const void* data, u32 data_size) +{ + m_desc.VS.pShaderBytecode = data; + m_desc.VS.BytecodeLength = data_size; +} + +void D3D12::GraphicsPipelineBuilder::SetGeometryShader(const ID3DBlob* blob) +{ + SetGeometryShader(const_cast(blob)->GetBufferPointer(), + static_cast(const_cast(blob)->GetBufferSize())); +} + +void D3D12::GraphicsPipelineBuilder::SetGeometryShader(const void* data, u32 data_size) +{ + m_desc.GS.pShaderBytecode = data; + m_desc.GS.BytecodeLength = data_size; +} + +void D3D12::GraphicsPipelineBuilder::SetPixelShader(const ID3DBlob* blob) +{ + SetPixelShader(const_cast(blob)->GetBufferPointer(), + static_cast(const_cast(blob)->GetBufferSize())); +} + +void D3D12::GraphicsPipelineBuilder::SetPixelShader(const void* data, u32 data_size) +{ + m_desc.PS.pShaderBytecode = data; + m_desc.PS.BytecodeLength = data_size; +} + +void D3D12::GraphicsPipelineBuilder::AddVertexAttribute(const char* semantic_name, u32 semantic_index, + DXGI_FORMAT format, u32 buffer, u32 offset) +{ + const u32 index = m_desc.InputLayout.NumElements; + m_input_elements[index].SemanticIndex = semantic_index; + m_input_elements[index].SemanticName = semantic_name; + m_input_elements[index].Format = format; + m_input_elements[index].AlignedByteOffset = offset; + m_input_elements[index].InputSlot = buffer; + m_input_elements[index].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + m_input_elements[index].InstanceDataStepRate = 0; + + m_desc.InputLayout.pInputElementDescs = m_input_elements.data(); + m_desc.InputLayout.NumElements++; +} + +void D3D12::GraphicsPipelineBuilder::SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type) +{ + m_desc.PrimitiveTopologyType = type; +} + +void D3D12::GraphicsPipelineBuilder::SetRasterizationState(D3D12_FILL_MODE polygon_mode, D3D12_CULL_MODE cull_mode, + bool front_face_ccw) +{ + m_desc.RasterizerState.FillMode = polygon_mode; + m_desc.RasterizerState.CullMode = cull_mode; + m_desc.RasterizerState.FrontCounterClockwise = front_face_ccw; +} + +void D3D12::GraphicsPipelineBuilder::SetMultisamples(u32 multisamples) +{ + m_desc.RasterizerState.MultisampleEnable = multisamples > 1; + m_desc.SampleDesc.Count = multisamples; +} + +void D3D12::GraphicsPipelineBuilder::SetNoCullRasterizationState() +{ + SetRasterizationState(D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false); +} + +void D3D12::GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op) +{ + m_desc.DepthStencilState.DepthEnable = depth_test; + m_desc.DepthStencilState.DepthWriteMask = depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + m_desc.DepthStencilState.DepthFunc = compare_op; +} + +void D3D12::GraphicsPipelineBuilder::SetStencilState(bool stencil_test, u8 read_mask, u8 write_mask, + const D3D12_DEPTH_STENCILOP_DESC& front, + const D3D12_DEPTH_STENCILOP_DESC& back) +{ + m_desc.DepthStencilState.StencilEnable = stencil_test; + m_desc.DepthStencilState.StencilReadMask = read_mask; + m_desc.DepthStencilState.StencilWriteMask = write_mask; + m_desc.DepthStencilState.FrontFace = front; + m_desc.DepthStencilState.BackFace = back; +} + +void D3D12::GraphicsPipelineBuilder::SetNoDepthTestState() +{ + SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); +} + +void D3D12::GraphicsPipelineBuilder::SetNoStencilState() +{ + D3D12_DEPTH_STENCILOP_DESC empty = {}; + SetStencilState(false, 0, 0, empty, empty); +} + +void D3D12::GraphicsPipelineBuilder::SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, + D3D12_BLEND dst_factor, D3D12_BLEND_OP op, + D3D12_BLEND alpha_src_factor, D3D12_BLEND alpha_dst_factor, + D3D12_BLEND_OP alpha_op, u8 write_mask /*= 0xFF*/) +{ + m_desc.BlendState.RenderTarget[rt].BlendEnable = blend_enable; + m_desc.BlendState.RenderTarget[rt].SrcBlend = src_factor; + m_desc.BlendState.RenderTarget[rt].DestBlend = dst_factor; + m_desc.BlendState.RenderTarget[rt].BlendOp = op; + m_desc.BlendState.RenderTarget[rt].SrcBlendAlpha = alpha_src_factor; + m_desc.BlendState.RenderTarget[rt].DestBlendAlpha = alpha_dst_factor; + m_desc.BlendState.RenderTarget[rt].BlendOpAlpha = alpha_op; + m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask; + + if (rt > 0) + m_desc.BlendState.IndependentBlendEnable = TRUE; +} + +void D3D12::GraphicsPipelineBuilder::SetColorWriteMask(u32 rt, u8 write_mask /* = D3D12_COLOR_WRITE_ENABLE_ALL */) +{ + m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask; +} + +void D3D12::GraphicsPipelineBuilder::SetNoBlendingState() +{ + SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, + D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); + m_desc.BlendState.IndependentBlendEnable = FALSE; +} + +void D3D12::GraphicsPipelineBuilder::ClearRenderTargets() +{ + m_desc.NumRenderTargets = 0; + for (u32 i = 0; i < sizeof(m_desc.RTVFormats) / sizeof(m_desc.RTVFormats[0]); i++) + m_desc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; +} + +void D3D12::GraphicsPipelineBuilder::SetRenderTarget(u32 rt, DXGI_FORMAT format) +{ + m_desc.RTVFormats[rt] = format; + if (rt >= m_desc.NumRenderTargets) + m_desc.NumRenderTargets = rt + 1; +} + +void D3D12::GraphicsPipelineBuilder::ClearDepthStencilFormat() +{ + m_desc.DSVFormat = DXGI_FORMAT_UNKNOWN; +} + +void D3D12::GraphicsPipelineBuilder::SetDepthStencilFormat(DXGI_FORMAT format) +{ + m_desc.DSVFormat = format; +} + +D3D12::ComputePipelineBuilder::ComputePipelineBuilder() +{ + Clear(); +} + +void D3D12::ComputePipelineBuilder::Clear() +{ + std::memset(&m_desc, 0, sizeof(m_desc)); +} + +Microsoft::WRL::ComPtr D3D12::ComputePipelineBuilder::Create(ID3D12Device* device, + bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr ps; + HRESULT hr = device->CreateComputePipelineState(&m_desc, IID_PPV_ARGS(ps.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateComputePipelineState() failed: %08X", hr); + return {}; + } + + if (clear) + Clear(); + + return ps; +} + +void D3D12::ComputePipelineBuilder::SetRootSignature(ID3D12RootSignature* rs) +{ + m_desc.pRootSignature = rs; +} + +void D3D12::ComputePipelineBuilder::SetShader(const void* data, u32 data_size) +{ + m_desc.CS.pShaderBytecode = data; + m_desc.CS.BytecodeLength = data_size; +} + +D3D12::RootSignatureBuilder::RootSignatureBuilder() +{ + Clear(); +} + +void D3D12::RootSignatureBuilder::Clear() +{ + m_desc = {}; + m_desc.pParameters = m_params.data(); + m_params = {}; + m_descriptor_ranges = {}; + m_num_descriptor_ranges = 0; +} + +Microsoft::WRL::ComPtr D3D12::RootSignatureBuilder::Create(bool clear /*= true*/) +{ + Microsoft::WRL::ComPtr rs = D3D12Device::GetInstance().CreateRootSignature(&m_desc); + if (!rs) + return {}; + + if (clear) + Clear(); + + return rs; +} + +void D3D12::RootSignatureBuilder::SetInputAssemblerFlag() +{ + m_desc.Flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; +} + +u32 D3D12::RootSignatureBuilder::Add32BitConstants(u32 shader_reg, u32 num_values, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + m_params[index].ShaderVisibility = visibility; + m_params[index].Constants.ShaderRegister = shader_reg; + m_params[index].Constants.RegisterSpace = 0; + m_params[index].Constants.Num32BitValues = num_values; + + return index; +} + +u32 D3D12::RootSignatureBuilder::AddCBVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + m_params[index].ShaderVisibility = visibility; + m_params[index].Descriptor.ShaderRegister = shader_reg; + m_params[index].Descriptor.RegisterSpace = 0; + + return index; +} + +u32 D3D12::RootSignatureBuilder::AddSRVParameter(u32 shader_reg, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + m_params[index].ShaderVisibility = visibility; + m_params[index].Descriptor.ShaderRegister = shader_reg; + m_params[index].Descriptor.RegisterSpace = 0; + + return index; +} + +u32 D3D12::RootSignatureBuilder::AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rt, u32 start_shader_reg, + u32 num_shader_regs, D3D12_SHADER_VISIBILITY visibility) +{ + const u32 index = m_desc.NumParameters++; + const u32 dr_index = m_num_descriptor_ranges++; + + m_descriptor_ranges[dr_index].RangeType = rt; + m_descriptor_ranges[dr_index].NumDescriptors = num_shader_regs; + m_descriptor_ranges[dr_index].BaseShaderRegister = start_shader_reg; + m_descriptor_ranges[dr_index].RegisterSpace = 0; + m_descriptor_ranges[dr_index].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + m_params[index].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + m_params[index].DescriptorTable.pDescriptorRanges = &m_descriptor_ranges[dr_index]; + m_params[index].DescriptorTable.NumDescriptorRanges = 1; + m_params[index].ShaderVisibility = visibility; + + return index; +} + +#ifdef _DEBUG + +void D3D12::SetObjectName(ID3D12Object* object, const std::string_view& name) +{ + object->SetName(StringUtil::UTF8StringToWideString(name).c_str()); +} + +void D3D12::SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) +{ + std::va_list ap; + va_start(ap, format); + SetObjectName(object, StringUtil::StdStringFromFormatV(format, ap).c_str()); + va_end(ap); +} + +#endif diff --git a/src/common/d3d12/util.h b/src/util/d3d12_builders.h similarity index 66% rename from src/common/d3d12/util.h rename to src/util/d3d12_builders.h index 11f2c1986..657c413cb 100644 --- a/src/common/d3d12/util.h +++ b/src/util/d3d12_builders.h @@ -1,47 +1,17 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../types.h" -#include "../windows_headers.h" + +#include "common/types.h" +#include "common/windows_headers.h" + #include #include +#include #include namespace D3D12 { - -class ShaderCache; - -void ResourceBarrier(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, D3D12_RESOURCE_STATES from_state, - D3D12_RESOURCE_STATES to_state); - -void SetViewport(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, float min_depth = 0.0f, - float max_depth = 1.0f); - -void SetScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height); - -void SetViewportAndScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, - float min_depth = 0.0f, float max_depth = 1.0f); - -void SetViewportAndClampScissor(ID3D12GraphicsCommandList* cmdlist, int x, int y, int width, int height, - float min_depth = 0.0f, float max_depth = 1.0f); - -u32 GetTexelSize(DXGI_FORMAT format); - -void SetDefaultSampler(D3D12_SAMPLER_DESC* desc); - -#ifdef _DEBUG - -void SetObjectName(ID3D12Object* object, const char* name); -void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...); - -#else - -static inline void SetObjectName(ID3D12Object* object, const char* name) {} -static inline void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) {} - -#endif - class RootSignatureBuilder { public: @@ -84,10 +54,11 @@ public: ~GraphicsPipelineBuilder() = default; + ALWAYS_INLINE const D3D12_GRAPHICS_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; } + void Clear(); Microsoft::WRL::ComPtr Create(ID3D12Device* device, bool clear = true); - Microsoft::WRL::ComPtr Create(ID3D12Device* device, ShaderCache& cache, bool clear = true); void SetRootSignature(ID3D12RootSignature* rs); @@ -95,9 +66,9 @@ public: void SetGeometryShader(const void* data, u32 data_size); void SetPixelShader(const void* data, u32 data_size); - void SetVertexShader(ID3DBlob* blob); - void SetGeometryShader(ID3DBlob* blob); - void SetPixelShader(ID3DBlob* blob); + void SetVertexShader(const ID3DBlob* blob); + void SetGeometryShader(const ID3DBlob* blob); + void SetPixelShader(const ID3DBlob* blob); void AddVertexAttribute(const char* semantic_name, u32 semantic_index, DXGI_FORMAT format, u32 buffer, u32 offset); @@ -110,12 +81,16 @@ public: void SetNoCullRasterizationState(); void SetDepthState(bool depth_test, bool depth_write, D3D12_COMPARISON_FUNC compare_op); + void SetStencilState(bool stencil_test, u8 read_mask, u8 write_mask, const D3D12_DEPTH_STENCILOP_DESC& front, + const D3D12_DEPTH_STENCILOP_DESC& back); void SetNoDepthTestState(); + void SetNoStencilState(); void SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, D3D12_BLEND_OP op, D3D12_BLEND alpha_src_factor, D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op, u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL); + void SetColorWriteMask(u32 rt, u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL); void SetNoBlendingState(); @@ -128,8 +103,37 @@ public: void SetDepthStencilFormat(DXGI_FORMAT format); private: - D3D12_GRAPHICS_PIPELINE_STATE_DESC m_desc{}; - std::array m_input_elements{}; + D3D12_GRAPHICS_PIPELINE_STATE_DESC m_desc; + std::array m_input_elements; }; -} // namespace D3D12 \ No newline at end of file +class ComputePipelineBuilder +{ +public: + ComputePipelineBuilder(); + ~ComputePipelineBuilder() = default; + + void Clear(); + + Microsoft::WRL::ComPtr Create(ID3D12Device* device, bool clear = true); + + void SetRootSignature(ID3D12RootSignature* rs); + + void SetShader(const void* data, u32 data_size); + +private: + D3D12_COMPUTE_PIPELINE_STATE_DESC m_desc; +}; + +#ifdef _DEBUG +void SetObjectName(ID3D12Object* object, const std::string_view& name); +void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...); +#else +static inline void SetObjectName(ID3D12Object* object, const std::string_view& name) +{ +} +static inline void SetObjectNameFormatted(ID3D12Object* object, const char* format, ...) +{ +} +#endif +} // namespace D3D12 diff --git a/src/util/d3d12_descriptor_heap_manager.cpp b/src/util/d3d12_descriptor_heap_manager.cpp new file mode 100644 index 000000000..8ebb3ed05 --- /dev/null +++ b/src/util/d3d12_descriptor_heap_manager.cpp @@ -0,0 +1,154 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d12_descriptor_heap_manager.h" + +#include "common/assert.h" +#include "common/log.h" + +Log_SetChannel(D3D12Device); + +D3D12DescriptorHeapManager::D3D12DescriptorHeapManager() = default; +D3D12DescriptorHeapManager::~D3D12DescriptorHeapManager() = default; + +bool D3D12DescriptorHeapManager::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, + bool shader_visible) +{ + D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), + shader_visible ? D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE : + D3D12_DESCRIPTOR_HEAP_FLAG_NONE}; + + HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(m_descriptor_heap.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateDescriptorHeap() failed: %08X", hr); + return false; + } + + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + if (shader_visible) + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); + m_shader_visible = shader_visible; + + // Set all slots to unallocated (1) + const u32 bitset_count = num_descriptors / BITSET_SIZE + (((num_descriptors % BITSET_SIZE) != 0) ? 1 : 0); + m_free_slots.resize(bitset_count); + for (BitSetType& bs : m_free_slots) + bs.flip(); + + return true; +} + +void D3D12DescriptorHeapManager::Destroy() +{ + for (BitSetType& bs : m_free_slots) + { + DebugAssert(bs.all()); + } + + m_shader_visible = false; + m_num_descriptors = 0; + m_descriptor_increment_size = 0; + m_heap_base_cpu = {}; + m_heap_base_gpu = {}; + m_descriptor_heap.Reset(); + m_free_slots.clear(); +} + +bool D3D12DescriptorHeapManager::Allocate(D3D12DescriptorHandle* handle) +{ + // Start past the temporary slots, no point in searching those. + for (u32 group = 0; group < m_free_slots.size(); group++) + { + BitSetType& bs = m_free_slots[group]; + if (bs.none()) + continue; + + u32 bit = 0; + for (; bit < BITSET_SIZE; bit++) + { + if (bs[bit]) + break; + } + + u32 index = group * BITSET_SIZE + bit; + bs[bit] = false; + + handle->index = index; + handle->cpu_handle.ptr = m_heap_base_cpu.ptr + index * m_descriptor_increment_size; + handle->gpu_handle.ptr = m_shader_visible ? (m_heap_base_gpu.ptr + index * m_descriptor_increment_size) : 0; + return true; + } + + Panic("Out of fixed descriptors"); + return false; +} + +void D3D12DescriptorHeapManager::Free(u32 index) +{ + DebugAssert(index < m_num_descriptors); + + u32 group = index / BITSET_SIZE; + u32 bit = index % BITSET_SIZE; + m_free_slots[group][bit] = true; +} + +void D3D12DescriptorHeapManager::Free(D3D12DescriptorHandle* handle) +{ + if (handle->index == D3D12DescriptorHandle::INVALID_INDEX) + return; + + Free(handle->index); + handle->Clear(); +} + +D3D12DescriptorAllocator::D3D12DescriptorAllocator() = default; +D3D12DescriptorAllocator::~D3D12DescriptorAllocator() = default; + +bool D3D12DescriptorAllocator::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors) +{ + const D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast(num_descriptors), + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE}; + const HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(m_descriptor_heap.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateDescriptorHeap() failed: %08X", hr); + return false; + } + + m_num_descriptors = num_descriptors; + m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type); + m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart(); + m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart(); + return true; +} + +void D3D12DescriptorAllocator::Destroy() +{ + m_descriptor_heap.Reset(); + m_descriptor_increment_size = 0; + m_num_descriptors = 0; + m_current_offset = 0; + m_heap_base_cpu = {}; + m_heap_base_gpu = {}; +} + +bool D3D12DescriptorAllocator::Allocate(u32 num_handles, D3D12DescriptorHandle* out_base_handle) +{ + if ((m_current_offset + num_handles) > m_num_descriptors) + return false; + + out_base_handle->index = m_current_offset; + out_base_handle->cpu_handle.ptr = m_heap_base_cpu.ptr + m_current_offset * m_descriptor_increment_size; + out_base_handle->gpu_handle.ptr = m_heap_base_gpu.ptr + m_current_offset * m_descriptor_increment_size; + m_current_offset += num_handles; + return true; +} + +void D3D12DescriptorAllocator::Reset() +{ + m_current_offset = 0; +} diff --git a/src/util/d3d12_descriptor_heap_manager.h b/src/util/d3d12_descriptor_heap_manager.h new file mode 100644 index 000000000..28e54d09c --- /dev/null +++ b/src/util/d3d12_descriptor_heap_manager.h @@ -0,0 +1,244 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/hash_combine.h" +#include "common/types.h" +#include "common/windows_headers.h" + +#include +#include +#include +#include +#include +#include + +// This class provides an abstraction for D3D12 descriptor heaps. +struct D3D12DescriptorHandle final +{ + enum : u32 + { + INVALID_INDEX = 0xFFFFFFFF + }; + + D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle{}; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle{}; + u32 index = INVALID_INDEX; + + ALWAYS_INLINE operator bool() const { return index != INVALID_INDEX; } + + ALWAYS_INLINE operator D3D12_CPU_DESCRIPTOR_HANDLE() const { return cpu_handle; } + ALWAYS_INLINE operator D3D12_GPU_DESCRIPTOR_HANDLE() const { return gpu_handle; } + + ALWAYS_INLINE bool operator==(const D3D12DescriptorHandle& rhs) const { return (index == rhs.index); } + ALWAYS_INLINE bool operator!=(const D3D12DescriptorHandle& rhs) const { return (index != rhs.index); } + ALWAYS_INLINE bool operator<(const D3D12DescriptorHandle& rhs) const { return (index < rhs.index); } + ALWAYS_INLINE bool operator<=(const D3D12DescriptorHandle& rhs) const { return (index <= rhs.index); } + ALWAYS_INLINE bool operator>(const D3D12DescriptorHandle& rhs) const { return (index > rhs.index); } + ALWAYS_INLINE bool operator>=(const D3D12DescriptorHandle& rhs) const { return (index >= rhs.index); } + + ALWAYS_INLINE void Clear() + { + cpu_handle = {}; + gpu_handle = {}; + index = INVALID_INDEX; + } +}; + +class D3D12DescriptorHeapManager final +{ +public: + D3D12DescriptorHeapManager(); + ~D3D12DescriptorHeapManager(); + + ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } + + bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors, bool shader_visible); + void Destroy(); + + bool Allocate(D3D12DescriptorHandle* handle); + void Free(D3D12DescriptorHandle* handle); + void Free(u32 index); + +private: + Microsoft::WRL::ComPtr m_descriptor_heap; + u32 m_num_descriptors = 0; + u32 m_descriptor_increment_size = 0; + bool m_shader_visible = false; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; + + static constexpr u32 BITSET_SIZE = 1024; + using BitSetType = std::bitset; + std::vector m_free_slots = {}; +}; + +class D3D12DescriptorAllocator +{ +public: + D3D12DescriptorAllocator(); + ~D3D12DescriptorAllocator(); + + ALWAYS_INLINE ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); } + ALWAYS_INLINE u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; } + + bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors); + void Destroy(); + + bool Allocate(u32 num_handles, D3D12DescriptorHandle* out_base_handle); + void Reset(); + +private: + Microsoft::WRL::ComPtr m_descriptor_heap; + u32 m_descriptor_increment_size = 0; + u32 m_num_descriptors = 0; + u32 m_current_offset = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {}; + D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {}; +}; + +template +class D3D12GroupedSamplerAllocator : private D3D12DescriptorAllocator +{ + struct Key + { + u32 idx[NumSamplers]; + + ALWAYS_INLINE bool operator==(const Key& rhs) const { return (std::memcmp(idx, rhs.idx, sizeof(idx)) == 0); } + ALWAYS_INLINE bool operator!=(const Key& rhs) const { return (std::memcmp(idx, rhs.idx, sizeof(idx)) != 0); } + }; + + struct KeyHash + { + ALWAYS_INLINE std::size_t operator()(const Key& key) const + { + size_t seed = 0; + for (u32 i : key.idx) + hash_combine(seed, i); + return seed; + } + }; + +public: + D3D12GroupedSamplerAllocator(); + ~D3D12GroupedSamplerAllocator(); + + using D3D12DescriptorAllocator::GetDescriptorHeap; + using D3D12DescriptorAllocator::GetDescriptorIncrementSize; + + bool Create(ID3D12Device* device, u32 num_descriptors); + void Destroy(); + + bool LookupSingle(ID3D12Device* device, D3D12DescriptorHandle* gpu_handle, const D3D12DescriptorHandle& cpu_handle); + bool LookupGroup(ID3D12Device* device, D3D12DescriptorHandle* gpu_handle, const D3D12DescriptorHandle* cpu_handles); + + // Clears cache but doesn't reset allocator. + void InvalidateCache(); + + void Reset(); + bool ShouldReset() const; + +private: + std::unordered_map m_groups; +}; + +template +D3D12GroupedSamplerAllocator::D3D12GroupedSamplerAllocator() = default; + +template +D3D12GroupedSamplerAllocator::~D3D12GroupedSamplerAllocator() = default; + +template +bool D3D12GroupedSamplerAllocator::Create(ID3D12Device* device, u32 num_descriptors) +{ + return D3D12DescriptorAllocator::Create(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_descriptors); +} + +template +void D3D12GroupedSamplerAllocator::Destroy() +{ + D3D12DescriptorAllocator::Destroy(); +} + +template +void D3D12GroupedSamplerAllocator::Reset() +{ + m_groups.clear(); + D3D12DescriptorAllocator::Reset(); +} + +template +void D3D12GroupedSamplerAllocator::InvalidateCache() +{ + m_groups.clear(); +} + +template +bool D3D12GroupedSamplerAllocator::LookupSingle(ID3D12Device* device, D3D12DescriptorHandle* gpu_handle, + const D3D12DescriptorHandle& cpu_handle) +{ + Key key; + key.idx[0] = cpu_handle.index; + for (u32 i = 1; i < NumSamplers; i++) + key.idx[i] = 0; + + auto it = m_groups.find(key); + if (it != m_groups.end()) + { + *gpu_handle = it->second; + return true; + } + + if (!Allocate(1, gpu_handle)) + return false; + + device->CopyDescriptorsSimple(1, *gpu_handle, cpu_handle, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_groups.emplace(key, *gpu_handle); + return true; +} + +template +bool D3D12GroupedSamplerAllocator::LookupGroup(ID3D12Device* device, D3D12DescriptorHandle* gpu_handle, + const D3D12DescriptorHandle* cpu_handles) +{ + Key key; + for (u32 i = 0; i < NumSamplers; i++) + key.idx[i] = cpu_handles[i].index; + + auto it = m_groups.find(key); + if (it != m_groups.end()) + { + *gpu_handle = it->second; + return true; + } + + if (!Allocate(NumSamplers, gpu_handle)) + return false; + + D3D12_CPU_DESCRIPTOR_HANDLE dst_handle = *gpu_handle; + UINT dst_size = NumSamplers; + D3D12_CPU_DESCRIPTOR_HANDLE src_handles[NumSamplers]; + UINT src_sizes[NumSamplers]; + for (u32 i = 0; i < NumSamplers; i++) + { + src_handles[i] = cpu_handles[i]; + src_sizes[i] = 1; + } + device->CopyDescriptors(1, &dst_handle, &dst_size, NumSamplers, src_handles, src_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + m_groups.emplace(key, *gpu_handle); + return true; +} + +template +bool D3D12GroupedSamplerAllocator::ShouldReset() const +{ + // We only reset the sampler heap if more than half of the descriptors are used. + // This saves descriptor copying when there isn't a large number of sampler configs per frame. + return m_groups.size() >= (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE / 2); +} diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp new file mode 100644 index 000000000..e881906a1 --- /dev/null +++ b/src/util/d3d12_device.cpp @@ -0,0 +1,2117 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d12_device.h" +#include "d3d12_builders.h" +#include "d3d12_pipeline.h" +#include "d3d12_stream_buffer.h" +#include "d3d12_texture.h" +#include "d3d_common.h" +#include "postprocessing_chain.h" // TODO: Remove me + +#include "core/host.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/bitutils.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/path.h" +#include "common/scoped_guard.h" +#include "common/string.h" +#include "common/string_util.h" + +#include "D3D12MemAlloc.h" +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(D3D12Device); + +// Tweakables +enum : u32 +{ + MIN_TEXEL_BUFFER_ELEMENTS = 1024 * 512, + + MAX_DRAW_CALLS_PER_FRAME = 2048, + MAX_DESCRIPTORS_PER_FRAME = 32768, + MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, + + MAX_PERSISTENT_DESCRIPTORS = 2048, + MAX_PERSISTENT_RTVS = 512, + MAX_PERSISTENT_DSVS = 128, + MAX_PERSISTENT_SAMPLERS = 512, + + VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, + INDEX_BUFFER_SIZE = 16 * 1024 * 1024, + VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, + + // UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + UNIFORM_PUSH_CONSTANTS_SIZE = 128, + + MAX_UNIFORM_BUFFER_SIZE = 1024, +}; + +// We need to synchronize instance creation because of adapter enumeration from the UI thread. +static std::mutex s_instance_mutex; + +static constexpr D3D12_CLEAR_VALUE s_present_clear_color = {DXGI_FORMAT_R8G8B8A8_UNORM, {0.0f, 0.0f, 0.0f, 1.0f}}; +static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8; + +// We just need to keep this alive, never reference it. +static std::vector s_pipeline_cache_data; + +#ifdef _DEBUG +#include "WinPixEventRuntime/pix3.h" +static u32 s_debug_scope_depth = 0; +#endif + +D3D12Device::D3D12Device() +{ +#ifdef _DEBUG + s_debug_scope_depth = 0; +#endif +} + +D3D12Device::~D3D12Device() +{ + Assert(!m_device); + Assert(s_pipeline_cache_data.empty()); +} + +static constexpr u32 GetActiveTexturesForLayout(GPUPipeline::Layout layout) +{ + constexpr std::array(GPUPipeline::Layout::MaxCount)> counts = { + 1, // SingleTextureAndUBO + 1, // SingleTextureAndPushConstants + 0, // SingleTextureBufferAndPushConstants + GPUDevice::MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO + GPUDevice::MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants + }; + + return counts[static_cast(layout)]; +} + +D3D12Device::ComPtr D3D12Device::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) +{ + ComPtr blob; + ComPtr error_blob; + const HRESULT hr = + D3D12SerializeRootSignature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), error_blob.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("D3D12SerializeRootSignature() failed: %08X", hr); + if (error_blob) + Log_ErrorPrintf("%s", error_blob->GetBufferPointer()); + + return {}; + } + + return blob; +} + +D3D12Device::ComPtr D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc) +{ + ComPtr blob = SerializeRootSignature(desc); + if (!blob) + return {}; + + ComPtr rs; + const HRESULT hr = + m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateRootSignature() failed: %08X", hr); + return {}; + } + + return rs; +} + +bool D3D12Device::CreateDevice(const std::string_view& adapter, bool threaded_presentation) +{ + std::unique_lock lock(s_instance_mutex); + + m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device); + if (!m_dxgi_factory) + return false; + + m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); + + HRESULT hr; + + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + if (m_debug_device) + { + ComPtr debug12; + hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.GetAddressOf())); + if (SUCCEEDED(hr)) + { + debug12->EnableDebugLayer(); + } + else + { + Log_ErrorPrintf("Debug layer requested but not available."); + m_debug_device = false; + } + } + + // Create the actual device. + m_feature_level = D3D_FEATURE_LEVEL_11_0; + hr = D3D12CreateDevice(m_adapter.Get(), m_feature_level, IID_PPV_ARGS(&m_device)); + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create D3D12 device: %08X", hr); + return false; + } + + if (!m_adapter) + { + const LUID luid(m_device->GetAdapterLuid()); + if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf())))) + Log_ErrorPrintf("Failed to get lookup adapter by device LUID"); + } + + if (m_debug_device) + { + ComPtr info_queue; + if (SUCCEEDED(m_device.As(&info_queue))) + { + if (IsDebuggerPresent()) + { + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); + } + + D3D12_INFO_QUEUE_FILTER filter = {}; + std::array id_list{ + D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, + D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, + D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, + D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND, + }; + filter.DenyList.NumIDs = static_cast(id_list.size()); + filter.DenyList.pIDList = id_list.data(); + info_queue->PushStorageFilter(&filter); + } + } + + const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, + D3D12_COMMAND_QUEUE_FLAG_NONE}; + hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create command queue: %08X", hr); + return false; + } + + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; + allocatorDesc.pDevice = m_device.Get(); + allocatorDesc.pAdapter = m_adapter.Get(); + allocatorDesc.Flags = + D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED | + D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/; + + hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("D3D12MA::CreateAllocator() failed with HRESULT %08X", hr); + return false; + } + + hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to create fence: %08X", hr); + return false; + } + + m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (m_fence_event == NULL) + { + Log_ErrorPrintf("Failed to create fence event: %08X", GetLastError()); + return false; + } + + SetFeatures(); + + if (!CreateCommandLists() || !CreateDescriptorHeaps()) + return false; + + if (!m_window_info.IsSurfaceless() && !CreateSwapChain()) + return false; + + if (!CreateRootSignatures() || !CreateBuffers()) + return false; + + CreateTimestampQuery(); + return true; +} + +void D3D12Device::DestroyDevice() +{ + std::unique_lock lock(s_instance_mutex); + + // Toss command list if we're recording... + if (InRenderPass()) + EndRenderPass(); + + WaitForGPUIdle(); + + DestroyDeferredObjects(m_current_fence_value); + DestroyDownloadBuffer(); + DestroySamplers(); + DestroyTimestampQuery(); + DestroyBuffers(); + DestroyDescriptorHeaps(); + DestroyRootSignatures(); + DestroySwapChain(); + DestroyCommandLists(); + + m_pipeline_library.Reset(); + std::vector().swap(s_pipeline_cache_data); + m_fence.Reset(); + if (m_fence_event != NULL) + { + CloseHandle(m_fence_event); + m_fence_event = NULL; + } + + m_allocator.Reset(); + m_command_queue.Reset(); + m_device.Reset(); + m_adapter.Reset(); + m_dxgi_factory.Reset(); +} + +bool D3D12Device::ReadPipelineCache(const std::string& filename) +{ + std::optional> data; + + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "rb"); + if (fp) + data = FileSystem::ReadBinaryFile(fp.get()); + + const HRESULT hr = + m_device->CreatePipelineLibrary(data.has_value() ? data->data() : nullptr, data.has_value() ? data->size() : 0, + IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + Log_WarningPrintf("CreatePipelineLibrary() failed with HRESULT %08X, pipeline caching will not be available.", hr); + else if (data.has_value()) + s_pipeline_cache_data = std::move(data.value()); + + return SUCCEEDED(hr); +} + +bool D3D12Device::GetPipelineCacheData(DynamicHeapArray* data) +{ + if (!m_pipeline_library) + return false; + + const size_t size = m_pipeline_library->GetSerializedSize(); + if (size == 0) + { + Log_WarningPrintf("Empty serialized pipeline state returned."); + return false; + } + + data->resize(size); + const HRESULT hr = m_pipeline_library->Serialize(data->data(), data->size()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Serialize() failed with HRESULT %08X", hr); + data->deallocate(); + return false; + } + + return true; +} + +bool D3D12Device::CreateCommandLists() +{ + for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) + { + CommandList& res = m_command_lists[i]; + HRESULT hr; + + for (u32 j = 0; j < 2; j++) + { + hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(res.command_allocators[j].GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateCommandAllocator() failed: %08X", hr); + return false; + } + + hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr, + IID_PPV_ARGS(res.command_lists[j].GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateCommandList() failed: %08X", hr); + return false; + } + + // Close the command lists, since the first thing we do is reset them. + hr = res.command_lists[j]->Close(); + if (FAILED(hr)) + { + Log_ErrorPrintf("Close() failed: %08X", hr); + return false; + } + } + + if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + MAX_DESCRIPTORS_PER_FRAME)) + { + Log_ErrorPrintf("Failed to create per frame descriptor allocator"); + return false; + } + + if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME)) + { + Log_ErrorPrintf("Failed to create per frame sampler allocator"); + return false; + } + } + + MoveToNextCommandList(); + return true; +} + +void D3D12Device::MoveToNextCommandList() +{ + m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + m_current_fence_value++; + + // We may have to wait if this command list hasn't finished on the GPU. + CommandList& res = m_command_lists[m_current_command_list]; + WaitForFence(res.fence_counter); + res.fence_counter = m_current_fence_value; + res.init_list_used = false; + + // Begin command list. + res.command_allocators[1]->Reset(); + res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr); + res.descriptor_allocator.Reset(); + if (res.sampler_allocator.ShouldReset()) + res.sampler_allocator.Reset(); + + if (res.has_timestamp_query) + { + // readback timestamp from the last time this cmdlist was used. + // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist. + const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); + const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)}; + void* map; + HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map); + if (SUCCEEDED(hr)) + { + u64 timestamps[2]; + std::memcpy(timestamps, static_cast(map) + offset, sizeof(timestamps)); + m_accumulated_gpu_time += + static_cast(static_cast(timestamps[1] - timestamps[0]) / m_timestamp_frequency); + + const D3D12_RANGE write_range = {}; + m_timestamp_query_buffer->Unmap(0, &write_range); + } + else + { + Log_WarningPrintf("Map() for timestamp query failed: %08X", hr); + } + } + + res.has_timestamp_query = m_gpu_timing_enabled; + if (m_gpu_timing_enabled) + { + res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); + } + + ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(), + res.sampler_allocator.GetDescriptorHeap()}; + res.command_lists[1]->SetDescriptorHeaps(static_cast(std::size(heaps)), heaps); + + m_allocator->SetCurrentFrameIndex(static_cast(m_current_fence_value)); + InvalidateCachedState(); +} + +void D3D12Device::DestroyCommandLists() +{ + for (CommandList& resources : m_command_lists) + { + resources.descriptor_allocator.Destroy(); + resources.sampler_allocator.Destroy(); + for (u32 i = 0; i < 2; i++) + { + resources.command_lists[i].Reset(); + resources.command_allocators[i].Reset(); + } + } +} + +bool D3D12Device::CreateDescriptorHeaps() +{ + if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + MAX_PERSISTENT_DESCRIPTORS, false) || + !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false) || + !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false) || + !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, + false)) + { + return false; + } + + // Allocate null SRV descriptor for unbound textures. + constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + + if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) + { + Log_ErrorPrint("Failed to allocate null descriptor"); + return false; + } + + m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); + + // Same for samplers. + m_point_sampler = GetSampler(GPUSampler::GetNearestConfig()); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + m_current_samplers[i] = m_point_sampler; + return true; +} + +void D3D12Device::DestroyDescriptorHeaps() +{ + if (m_null_srv_descriptor) + m_descriptor_heap_manager.Free(&m_null_srv_descriptor); + m_sampler_heap_manager.Destroy(); + m_dsv_heap_manager.Destroy(); + m_rtv_heap_manager.Destroy(); + m_descriptor_heap_manager.Destroy(); +} + +ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList() +{ + CommandList& res = m_command_lists[m_current_command_list]; + if (!res.init_list_used) + { + HRESULT hr = res.command_allocators[0]->Reset(); + AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed"); + + hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr); + AssertMsg(SUCCEEDED(hr), "Reset init command list failed"); + res.init_list_used = true; + } + + return res.command_lists[0].Get(); +} + +void D3D12Device::SubmitCommandList(bool wait_for_completion) +{ + CommandList& res = m_command_lists[m_current_command_list]; + HRESULT hr; + + if (res.has_timestamp_query) + { + // write the timestamp back at the end of the cmdlist + res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); + res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, + NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(), + m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); + } + + // TODO: error handling + if (res.init_list_used) + { + hr = res.command_lists[0]->Close(); + if (FAILED(hr)) + { + Log_ErrorPrintf("Closing init command list failed with HRESULT %08X", hr); + Panic("TODO cannot continue"); + } + } + + // Close and queue command list. + hr = res.command_lists[1]->Close(); + if (FAILED(hr)) + { + Log_ErrorPrintf("Closing main command list failed with HRESULT %08X", hr); + Panic("TODO cannot continue"); + } + + if (res.init_list_used) + { + const std::array execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()}; + m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); + } + else + { + const std::array execute_lists{res.command_lists[1].Get()}; + m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); + } + + // Update fence when GPU has completed. + hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter); + DebugAssertMsg(SUCCEEDED(hr), "Signal fence"); + + MoveToNextCommandList(); + + if (wait_for_completion) + WaitForFence(res.fence_counter); +} + +void D3D12Device::SubmitCommandList(bool wait_for_completion, const char* reason, ...) +{ + std::va_list ap; + va_start(ap, reason); + const std::string reason_str(StringUtil::StdStringFromFormatV(reason, ap)); + va_end(ap); + + Log_WarningPrintf("Executing command buffer due to '%s'", reason_str.c_str()); + SubmitCommandList(wait_for_completion); +} + +void D3D12Device::SubmitCommandListAndRestartRenderPass(const char* reason) +{ + if (InRenderPass()) + EndRenderPass(); + + D3D12Framebuffer* fb = m_current_framebuffer; + D3D12Pipeline* pl = m_current_pipeline; + SubmitCommandList(false, "%s", reason); + + if (fb) + SetFramebuffer(fb); + SetPipeline(pl); + BeginRenderPass(); +} + +void D3D12Device::WaitForFence(u64 fence) +{ + if (m_completed_fence_value >= fence) + return; + + // Try non-blocking check. + m_completed_fence_value = m_fence->GetCompletedValue(); + if (m_completed_fence_value < fence) + { + // Fall back to event. + HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); + AssertMsg(SUCCEEDED(hr), "Set fence event on completion"); + WaitForSingleObject(m_fence_event, INFINITE); + m_completed_fence_value = m_fence->GetCompletedValue(); + } + + // Release resources for as many command lists which have completed. + DestroyDeferredObjects(m_completed_fence_value); +} + +void D3D12Device::WaitForGPUIdle() +{ + u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; + for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) + { + WaitForFence(m_command_lists[index].fence_counter); + index = (index + 1) % NUM_COMMAND_LISTS; + } +} + +bool D3D12Device::CreateTimestampQuery() +{ + constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS; + constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT; + + const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT}; + HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateQueryHeap() for timestamp failed with %08X", hr); + m_features.gpu_timing = false; + return false; + } + + const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK}; + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + BUFFER_SIZE, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + m_timestamp_query_allocation.GetAddressOf(), + IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateResource() for timestamp failed with %08X", hr); + m_features.gpu_timing = false; + return false; + } + + u64 frequency; + hr = m_command_queue->GetTimestampFrequency(&frequency); + if (FAILED(hr)) + { + Log_ErrorPrintf("GetTimestampFrequency() failed: %08X", hr); + m_features.gpu_timing = false; + return false; + } + + m_timestamp_frequency = static_cast(frequency) / 1000.0; + return true; +} + +void D3D12Device::DestroyTimestampQuery() +{ + m_timestamp_query_buffer.Reset(); + m_timestamp_query_allocation.Reset(); + m_timestamp_query_heap.Reset(); +} + +float D3D12Device::GetAndResetAccumulatedGPUTime() +{ + const float time = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return time; +} + +bool D3D12Device::SetGPUTimingEnabled(bool enabled) +{ + m_gpu_timing_enabled = enabled && m_features.gpu_timing; + return (enabled == m_gpu_timing_enabled); +} + +void D3D12Device::DeferObjectDestruction(ComPtr resource) +{ + DebugAssert(resource); + m_cleanup_resources.emplace_back(GetCurrentFenceValue(), + std::pair(nullptr, resource.Detach())); +} + +void D3D12Device::DeferResourceDestruction(ComPtr allocation, ComPtr resource) +{ + DebugAssert(allocation && resource); + m_cleanup_resources.emplace_back( + GetCurrentFenceValue(), std::pair(allocation.Detach(), resource.Detach())); +} + +void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor) +{ + DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX); + m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(), + std::pair(&heap, *descriptor)); + descriptor->Clear(); +} + +void D3D12Device::DestroyDeferredObjects(u64 fence_value) +{ + while (!m_cleanup_descriptors.empty()) + { + auto& it = m_cleanup_descriptors.front(); + if (it.first > fence_value) + break; + + it.second.first->Free(it.second.second.index); + m_cleanup_descriptors.pop_front(); + } + + while (!m_cleanup_resources.empty()) + { + auto& it = m_cleanup_resources.front(); + if (it.first > fence_value) + break; + + it.second.second->Release(); + if (it.second.first) + it.second.first->Release(); + m_cleanup_resources.pop_front(); + } +} + +void D3D12Device::GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory) +{ + ret->adapter_names = D3DCommon::GetAdapterNames(factory); + ret->fullscreen_modes = D3DCommon::GetFullscreenModes(factory, {}); +} + +GPUDevice::AdapterAndModeList D3D12Device::StaticGetAdapterAndModeList() +{ + AdapterAndModeList ret; + std::unique_lock lock(s_instance_mutex); + + // Device shouldn't be torn down since we have the lock. + if (g_gpu_device && g_gpu_device->GetRenderAPI() == RenderAPI::D3D12) + { + GetAdapterAndModeList(&ret, D3D12Device::GetInstance().m_dxgi_factory.Get()); + } + else + { + ComPtr factory = D3DCommon::CreateFactory(false); + if (factory) + GetAdapterAndModeList(&ret, factory.Get()); + } + + return ret; +} + +GPUDevice::AdapterAndModeList D3D12Device::GetAdapterAndModeList() +{ + AdapterAndModeList ret; + GetAdapterAndModeList(&ret, m_dxgi_factory.Get()); + return ret; +} + +RenderAPI D3D12Device::GetRenderAPI() const +{ + return RenderAPI::D3D12; +} + +bool D3D12Device::HasSurface() const +{ + return static_cast(m_swap_chain); +} + +bool D3D12Device::CreateSwapChain() +{ + if (m_window_info.type != WindowInfo::Type::Win32) + return false; + + const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format); + + const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); + RECT client_rc{}; + GetClientRect(window_hwnd, &client_rc); + + DXGI_MODE_DESC fullscreen_mode = {}; + ComPtr fullscreen_output; + if (Host::IsFullscreen()) + { + u32 fullscreen_width, fullscreen_height; + float fullscreen_refresh_rate; + m_is_exclusive_fullscreen = + GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) && + D3DCommon::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.Get(), client_rc, fullscreen_width, + fullscreen_height, fullscreen_refresh_rate, fm.resource_format, + &fullscreen_mode, fullscreen_output.GetAddressOf()); + } + else + { + m_is_exclusive_fullscreen = false; + } + + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; + swap_chain_desc.Width = static_cast(client_rc.right - client_rc.left); + swap_chain_desc.Height = static_cast(client_rc.bottom - client_rc.top); + swap_chain_desc.Format = fm.resource_format; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.BufferCount = 3; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + + m_using_allow_tearing = (m_allow_tearing_supported && !m_is_exclusive_fullscreen); + if (m_using_allow_tearing) + swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + + HRESULT hr = S_OK; + + if (m_is_exclusive_fullscreen) + { + DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc; + DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {}; + + fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + fs_sd_desc.Width = fullscreen_mode.Width; + fs_sd_desc.Height = fullscreen_mode.Height; + fs_desc.RefreshRate = fullscreen_mode.RefreshRate; + fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering; + fs_desc.Scaling = fullscreen_mode.Scaling; + fs_desc.Windowed = FALSE; + + Log_VerbosePrintf("Creating a %dx%d exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height); + hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &fs_sd_desc, &fs_desc, + fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_WarningPrint("Failed to create fullscreen swap chain, trying windowed."); + m_is_exclusive_fullscreen = false; + m_using_allow_tearing = m_allow_tearing_supported; + } + } + + if (!m_is_exclusive_fullscreen) + { + Log_VerbosePrintf("Creating a %dx%d windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height); + hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, + m_swap_chain.ReleaseAndGetAddressOf()); + } + + hr = m_dxgi_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES); + if (FAILED(hr)) + Log_WarningPrint("MakeWindowAssociation() to disable ALT+ENTER failed"); + + if (!CreateSwapChainRTV()) + { + DestroySwapChain(); + return false; + } + + // Render a frame as soon as possible to clear out whatever was previously being displayed. + RenderBlankFrame(); + return true; +} + +bool D3D12Device::CreateSwapChainRTV() +{ + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc); + if (FAILED(hr)) + return false; + + const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D}; + + for (u32 i = 0; i < swap_chain_desc.BufferCount; i++) + { + ComPtr backbuffer; + hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("GetBuffer for RTV failed: 0x%08X", hr); + DestroySwapChainRTVs(); + return false; + } + + D3D12::SetObjectNameFormatted(backbuffer.Get(), "Swap Chain Buffer #%u", i); + + D3D12DescriptorHandle rtv; + if (!m_rtv_heap_manager.Allocate(&rtv)) + { + Log_ErrorPrintf("Failed to allocate RTV handle"); + DestroySwapChainRTVs(); + return false; + } + + m_device->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv); + m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv); + } + + m_window_info.surface_width = swap_chain_desc.BufferDesc.Width; + m_window_info.surface_height = swap_chain_desc.BufferDesc.Height; + m_window_info.surface_format = s_swap_chain_format; + Log_VerbosePrintf("Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height); + + if (m_window_info.type == WindowInfo::Type::Win32) + { + BOOL fullscreen = FALSE; + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && + SUCCEEDED(m_swap_chain->GetDesc(&desc))) + { + m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + } + else + { + m_window_info.surface_refresh_rate = 0.0f; + } + } + + m_current_swap_chain_buffer = 0; + return true; +} + +void D3D12Device::DestroySwapChainRTVs() +{ + // Runtime gets cranky if we don't submit the current buffer... + if (InRenderPass()) + EndRenderPass(); + SubmitCommandList(true); + + for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it) + { + m_rtv_heap_manager.Free(it->second.index); + it->first.Reset(); + } + m_swap_chain_buffers.clear(); + m_current_swap_chain_buffer = 0; +} + +void D3D12Device::DestroySwapChain() +{ + if (!m_swap_chain) + return; + + DestroySwapChainRTVs(); + + // switch out of fullscreen before destroying + BOOL is_fullscreen; + if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen) + m_swap_chain->SetFullscreenState(FALSE, nullptr); + + m_swap_chain.Reset(); + m_is_exclusive_fullscreen = false; +} + +void D3D12Device::RenderBlankFrame() +{ + if (InRenderPass()) + EndRenderPass(); + + auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); + D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_RENDER_TARGET); + cmdlist->ClearRenderTargetView(swap_chain_buf.second, s_present_clear_color.Color, 0, nullptr); + D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_PRESENT); + SubmitCommandList(false); + m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0); +} + +bool D3D12Device::UpdateWindow() +{ + WaitForGPUIdle(); + DestroySwapChain(); + + if (!AcquireWindow(false)) + return false; + + if (m_window_info.IsSurfaceless()) + return true; + + if (!CreateSwapChain()) + { + Log_ErrorPrintf("Failed to create swap chain on updated window"); + return false; + } + + RenderBlankFrame(); + return true; +} + +void D3D12Device::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + if (!m_swap_chain) + return; + + m_window_info.surface_scale = new_window_scale; + + if (m_window_info.surface_width == static_cast(new_window_width) && + m_window_info.surface_height == static_cast(new_window_height)) + { + return; + } + + DestroySwapChainRTVs(); + + HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, + m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); + if (FAILED(hr)) + Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); + + if (!CreateSwapChainRTV()) + Panic("Failed to recreate swap chain RTV after resize"); +} + +void D3D12Device::DestroySurface() +{ + DestroySwapChainRTVs(); + DestroySwapChain(); +} + +bool D3D12Device::SupportsTextureFormat(GPUTexture::Format format) const +{ + constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; + + const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format; + if (dfmt == DXGI_FORMAT_UNKNOWN) + return false; + + D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt}; + return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) && + (support.Support1 & required) == required; +} + +std::string D3D12Device::GetDriverInfo() const +{ + std::string ret = fmt::format("{} ({})\n", D3DCommon::GetFeatureLevelString(m_feature_level), + D3DCommon::GetFeatureLevelShaderModelString(m_feature_level)); + + DXGI_ADAPTER_DESC desc; + if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc))) + { + ret += StringUtil::StdStringFromFormat("VID: 0x%04X PID: 0x%04X\n", desc.VendorId, desc.DeviceId); + ret += StringUtil::WideStringToUTF8String(desc.Description); + ret += "\n"; + + const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid)); + if (!driver_version.empty()) + { + ret += "Driver Version: "; + ret += driver_version; + } + } + + return ret; +} + +void D3D12Device::SetVSync(bool enabled) +{ + m_vsync_enabled = enabled; +} + +bool D3D12Device::BeginPresent(bool frame_skip) +{ + if (InRenderPass()) + EndRenderPass(); + + if (frame_skip) + return false; + + // If we're running surfaceless, kick the command buffer so we don't run out of descriptors. + if (!m_swap_chain) + { + SubmitCommandList(false); + return false; + } + + // TODO: Check if the device was lost. + + // Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode. + // This might get called repeatedly if it takes a while to switch back, that's the host's problem. + BOOL is_fullscreen; + if (m_is_exclusive_fullscreen && + (FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen)) + { + Host::RunOnCPUThread([]() { Host::SetFullscreen(false); }); + return false; + } + + BeginSwapChainRenderPass(); + return true; +} + +void D3D12Device::EndPresent() +{ + DebugAssert(InRenderPass() && !m_current_framebuffer); + EndRenderPass(); + + const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; + m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); + + ID3D12GraphicsCommandList* cmdlist = GetCommandList(); + D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_PRESENT); + + SubmitCommandList(false); + + if (!m_vsync_enabled && m_using_allow_tearing) + m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); + else + m_swap_chain->Present(static_cast(m_vsync_enabled), 0); +} + +#ifdef _DEBUG +static UINT64 Palette(float phase, const std::array& a, const std::array& b, + const std::array& c, const std::array& d) +{ + std::array result; + result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); + result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); + result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); + + return PIX_COLOR(static_cast(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)), + static_cast(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)), + static_cast(std::clamp(result[2] * 255.0f, 0.0f, 255.0f))); +} +#endif + +void D3D12Device::PushDebugGroup(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!m_debug_device) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + const UINT64 color = Palette(static_cast(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, + {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); + PIXBeginEvent(GetCommandList(), color, "%s", buf.c_str()); +#endif +} + +void D3D12Device::PopDebugGroup() +{ +#ifdef _DEBUG + if (!m_debug_device) + return; + + s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); + PIXEndEvent(GetCommandList()); +#endif +} + +void D3D12Device::InsertDebugMessage(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!m_debug_device) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + if (buf.empty()) + return; + + PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", buf.c_str()); +#endif +} + +void D3D12Device::SetFeatures() +{ + m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; + m_max_multisamples = 1; + for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) + { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast(multisamples)}; + + if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) && + fd.NumQualityLevels > 0) + { + m_max_multisamples = multisamples; + } + } + + m_features.dual_source_blend = true; + m_features.noperspective_interpolation = true; + m_features.per_sample_shading = true; + m_features.supports_texture_buffers = true; + m_features.texture_buffers_emulated_with_ssbo = false; + m_features.partial_msaa_resolve = true; + m_features.gpu_timing = true; + m_features.shader_cache = true; + m_features.pipeline_cache = true; + + BOOL allow_tearing_supported = false; + HRESULT hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, + sizeof(allow_tearing_supported)); + m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); +} + +void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + D3D12Texture* const S = static_cast(src); + D3D12Texture* const D = static_cast(dst); + + if (S->GetState() == GPUTexture::State::Cleared) + { + // source is cleared. if destination is a render target, we can carry the clear forward + if (D->IsRenderTargetOrDepthStencil()) + { + if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight()) + { + // pass it forward if we're clearing the whole thing + if (S->IsDepthStencil()) + D->SetClearDepth(S->GetClearDepth()); + else + D->SetClearColor(S->GetClearColor()); + + return; + } + + if (D->GetState() == GPUTexture::State::Cleared) + { + // destination is cleared, if it's the same colour and rect, we can just avoid this entirely + if (D->IsDepthStencil()) + { + if (D->GetClearDepth() == S->GetClearDepth()) + return; + } + else + { + if (D->GetClearColor() == S->GetClearColor()) + return; + } + } + } + + // commit the clear to the source first, then do normal copy + S->CommitClear(); + } + + // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first + // (the area outside of where we're copying to) + if (D->GetState() == GPUTexture::State::Cleared && + (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight())) + { + D->CommitClear(); + } + + // *now* we can do a normal image copy. + if (InRenderPass()) + EndRenderPass(); + + S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); + S->SetUseFenceValue(GetCurrentFenceValue()); + + D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); + D->SetUseFenceValue(GetCurrentFenceValue()); + + D3D12_TEXTURE_COPY_LOCATION srcloc; + srcloc.pResource = S->GetResource(); + srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level); + + D3D12_TEXTURE_COPY_LOCATION dstloc; + dstloc.pResource = D->GetResource(); + dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level); + + const D3D12_BOX srcbox{static_cast(src_x), static_cast(src_y), 0u, + static_cast(src_x + width), static_cast(src_y + height), 1u}; + GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox); + + D->SetState(GPUTexture::State::Dirty); +} + +void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) +{ + DebugAssert((src_x + width) <= src->GetWidth()); + DebugAssert((src_y + height) <= src->GetHeight()); + DebugAssert(src->IsMultisampled()); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + + if (InRenderPass()) + EndRenderPass(); + + D3D12Texture* D = static_cast(dst); + D3D12Texture* S = static_cast(src); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + const u32 DSR = D->CalculateSubresource(dst_layer, dst_level); + + S->CommitClear(cmdlist); + D->CommitClear(cmdlist); + + S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST); + + if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 && + width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level)) + { + cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat()); + } + else + { + D3D12_RECT src_rc{static_cast(src_x), static_cast(src_y), static_cast(src_x + width), + static_cast(src_y + height)}; + cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y, + S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE); + } + + S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState()); + D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState()); +} + +void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c) +{ + GPUDevice::ClearRenderTarget(t, c); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + EndRenderPass(); +} + +void D3D12Device::ClearDepth(GPUTexture* t, float d) +{ + GPUDevice::ClearDepth(t, d); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + EndRenderPass(); +} + +void D3D12Device::InvalidateRenderTarget(GPUTexture* t) +{ + GPUDevice::InvalidateRenderTarget(t); + if (InRenderPass() && m_current_framebuffer && + (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + { + EndRenderPass(); + } +} + +bool D3D12Device::CreateBuffers() +{ + if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate vertex buffer"); + return false; + } + + if (!m_index_buffer.Create(INDEX_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate index buffer"); + return false; + } + + if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate uniform buffer"); + return false; + } + + if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate texture upload buffer"); + return false; + } + + return true; +} + +void D3D12Device::DestroyBuffers() +{ + m_texture_upload_buffer.Destroy(false); + m_uniform_buffer.Destroy(false); + m_index_buffer.Destroy(false); + m_vertex_buffer.Destroy(false); +} + +void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const u32 req_size = vertex_size * vertex_count; + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + { + SubmitCommandListAndRestartRenderPass("out of vertex space"); + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + Panic("Failed to allocate vertex space"); + } + + *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); + *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; + *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; +} + +void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.CommitMemory(vertex_size * vertex_count); +} + +void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const u32 req_size = sizeof(DrawIndex) * index_count; + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + { + SubmitCommandListAndRestartRenderPass("out of index space"); + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + Panic("Failed to allocate index space"); + } + + *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); + *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); + *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); +} + +void D3D12Device::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); +} + +void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) +{ + static constexpr std::array(GPUPipeline::Layout::MaxCount)> push_parameter = { + 0, // SingleTextureAndUBO + 2, // SingleTextureAndPushConstants + 1, // SingleTextureBufferAndPushConstants + 0, // MultiTextureAndUBO + 2, // MultiTextureAndPushConstants + }; + + DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); + if (m_dirty_flags & DIRTY_FLAG_PIPELINE_LAYOUT) + { + m_dirty_flags &= ~DIRTY_FLAG_PIPELINE_LAYOUT; + UpdateRootSignature(); + } + + GetCommandList()->SetGraphicsRoot32BitConstants(push_parameter[static_cast(m_current_pipeline_layout)], + data_size / 4u, data, 0); +} + +void* D3D12Device::MapUniformBuffer(u32 size) +{ + const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + { + SubmitCommandListAndRestartRenderPass("out of uniform space"); + if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) + Panic("Failed to allocate uniform space."); + } + + return m_uniform_buffer.GetCurrentHostPointer(); +} + +void D3D12Device::UnmapUniformBuffer(u32 size) +{ + m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); + m_uniform_buffer.CommitMemory(size); + m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER; +} + +bool D3D12Device::CreateRootSignatures() +{ + D3D12::RootSignatureBuilder rsb; + + { + auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create())) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout"); + } + + { + auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create())) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout"); + } + + { + auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create())) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout"); + } + + { + auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create())) + return false; + D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout"); + } + + { + auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create())) + return false; + D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout"); + } + + return true; +} + +void D3D12Device::DestroyRootSignatures() +{ + for (auto it = m_root_signatures.rbegin(); it != m_root_signatures.rend(); ++it) + it->Reset(); +} + +void D3D12Device::SetFramebuffer(GPUFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + return; + + if (InRenderPass()) + EndRenderPass(); + + m_current_framebuffer = static_cast(fb); +} + +void D3D12Device::BeginRenderPass() +{ + DebugAssert(!InRenderPass()); + + D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc; + D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc; + const D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr; + const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr; + + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + if (LIKELY(m_current_framebuffer)) + { + D3D12Texture* rt = static_cast(m_current_framebuffer->GetRT()); + if (rt) + { + rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); + rt->SetUseFenceValue(GetCurrentFenceValue()); + rt_desc_p = &rt_desc; + rt_desc.cpuDescriptor = rt->GetWriteDescriptor(); + rt_desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; + + switch (rt->GetState()) + { + case GPUTexture::State::Cleared: + { + rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; + std::memcpy(rt_desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), + sizeof(rt_desc.BeginningAccess.Clear.ClearValue.Color)); + rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; + rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; + } + break; + + default: + UnreachableCode(); + break; + } + } + + D3D12Texture* ds = static_cast(m_current_framebuffer->GetDS()); + if (ds) + { + ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); + ds->SetUseFenceValue(GetCurrentFenceValue()); + ds_desc_p = &ds_desc; + ds_desc.cpuDescriptor = ds->GetWriteDescriptor(); + ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; + ds_desc.StencilBeginningAccess = {}; + ds_desc.StencilEndingAccess = {}; + + switch (ds->GetState()) + { + case GPUTexture::State::Cleared: + { + ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; + ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth(); + ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; + ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; + } + break; + + default: + UnreachableCode(); + break; + } + } + } + else + { + // Re-rendering to swap chain. + const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; + rt_desc = {swap_chain_buf.second, + {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE}, + {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE}}; + rt_desc_p = &rt_desc; + } + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + DebugAssert(rt_desc_p || ds_desc_p); + cmdlist->BeginRenderPass(rt_desc_p ? 1 : 0, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); + + // TODO: Stats + m_in_render_pass = true; + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); +} + +void D3D12Device::BeginSwapChainRenderPass() +{ + DebugAssert(!InRenderPass()); + + ID3D12GraphicsCommandList4* const cmdlist = GetCommandList(); + const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; + + D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, + D3D12_RESOURCE_STATE_RENDER_TARGET); + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + const D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = { + swap_chain_buf.second, + {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {s_present_clear_color}}, + {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE}}; + cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); + + m_current_framebuffer = nullptr; + m_in_render_pass = true; + + // Clear pipeline, it's likely incompatible. + m_current_pipeline = nullptr; +} + +bool D3D12Device::InRenderPass() +{ + return m_in_render_pass; +} + +void D3D12Device::EndRenderPass() +{ + DebugAssert(m_in_render_pass); + + // TODO: stats + m_in_render_pass = false; + + GetCommandList()->EndRenderPass(); +} + +void D3D12Device::UnbindFramebuffer(D3D12Framebuffer* fb) +{ + if (m_current_framebuffer != fb) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void D3D12Device::UnbindFramebuffer(D3D12Texture* tex) +{ + if (!m_current_framebuffer) + return; + + if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void D3D12Device::SetPipeline(GPUPipeline* pipeline) +{ + // First draw? Bind everything. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + { + m_current_pipeline = static_cast(pipeline); + if (!m_current_pipeline) + return; + + SetInitialPipelineState(); + return; + } + else if (m_current_pipeline == pipeline) + { + return; + } + + m_current_pipeline = static_cast(pipeline); + + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); + + if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology) + { + m_current_topology = topology; + cmdlist->IASetPrimitiveTopology(topology); + } + + if (u32 vertex_stride = m_current_pipeline->GetVertexStride(); + vertex_stride > 0 && m_current_vertex_stride != vertex_stride) + { + m_current_vertex_stride = vertex_stride; + SetVertexBuffer(cmdlist); + } + + // TODO: we don't need to change the blend constant if blending isn't on. + if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants) + { + m_current_blend_constant = blend_constants; + cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); + } + + if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout) + { + m_current_pipeline_layout = layout; + m_dirty_flags |= + DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS; + } +} + +void D3D12Device::UnbindPipeline(D3D12Pipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + m_current_pipeline = nullptr; +} + +void D3D12Device::InvalidateCachedState() +{ + m_dirty_flags = ALL_DIRTY_STATE; + m_in_render_pass = false; + m_current_framebuffer = nullptr; + m_current_pipeline = nullptr; + m_current_vertex_stride = 0; + m_current_blend_constant = 0; + m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; +} + +void D3D12Device::SetInitialPipelineState() +{ + DebugAssert(m_current_pipeline); + m_dirty_flags &= ~DIRTY_FLAG_INITIAL; + + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + m_current_vertex_stride = m_current_pipeline->GetVertexStride(); + SetVertexBuffer(cmdlist); + const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(), + DXGI_FORMAT_R16_UINT}; + cmdlist->IASetIndexBuffer(&ib_view); + + cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); + m_current_pipeline_layout = m_current_pipeline->GetLayout(); + + m_current_topology = m_current_pipeline->GetTopology(); + cmdlist->IASetPrimitiveTopology(m_current_topology); + + m_current_blend_constant = m_current_pipeline->GetBlendConstants(); + cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); + + SetViewport(cmdlist); + SetScissor(cmdlist); +} + +void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist) +{ + const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(), + m_current_vertex_stride}; + cmdlist->IASetVertexBuffers(0, 1, &vb_view); +} + +void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist) +{ + const D3D12_VIEWPORT vp = {static_cast(m_current_viewport.left), + static_cast(m_current_viewport.top), + static_cast(m_current_viewport.GetWidth()), + static_cast(m_current_viewport.GetHeight()), + 0.0f, + 1.0f}; + cmdlist->RSSetViewports(1, &vp); +} + +void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist) +{ + const D3D12_RECT rc = {static_cast(m_current_scissor.left), static_cast(m_current_scissor.top), + static_cast(m_current_scissor.right), static_cast(m_current_scissor.bottom)}; + cmdlist->RSSetScissorRects(1, &rc); +} + +void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + D3D12DescriptorHandle null_handle; + + D3D12Texture* T = static_cast(texture); + if (m_current_textures[slot] != T) + { + m_current_textures[slot] = T; + m_dirty_flags |= DIRTY_FLAG_TEXTURES; + + if (T) + { + T->SetUseFenceValue(GetCurrentFenceValue()); + if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + { + if (InRenderPass()) + EndRenderPass(); + T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + } + } + + const D3D12DescriptorHandle& handle = + sampler ? static_cast(sampler)->GetDescriptor() : m_point_sampler; + if (m_current_samplers[slot] != handle) + { + m_current_samplers[slot] = handle; + m_dirty_flags |= DIRTY_FLAG_SAMPLERS; + } +} + +void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ + DebugAssert(slot == 0); + if (m_current_texture_buffer == buffer) + return; + + m_current_texture_buffer = static_cast(buffer); + if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + m_dirty_flags |= DIRTY_FLAG_TEXTURES; +} + +void D3D12Device::UnbindTexture(D3D12Texture* tex) +{ + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == tex) + { + m_current_textures[i] = nullptr; + m_dirty_flags |= DIRTY_FLAG_TEXTURES; + } + } +} + +void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) +{ + if (m_current_texture_buffer != buf) + return; + + m_current_texture_buffer = nullptr; + + if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + m_dirty_flags |= DIRTY_FLAG_TEXTURES; +} + +void D3D12Device::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_current_viewport == rc) + return; + + m_current_viewport = rc; + + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + return; + + SetViewport(GetCommandList()); +} + +void D3D12Device::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_current_scissor == rc) + return; + + m_current_scissor = rc; + + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + return; + + SetScissor(GetCommandList()); +} + +void D3D12Device::PreDrawCheck() +{ + // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. + + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 dirty = std::exchange(m_dirty_flags, 0); + if (dirty != 0) + { + if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) + { + UpdateRootSignature(); + if (!UpdateRootParameters(dirty)) + { + SubmitCommandListAndRestartRenderPass("out of descriptors"); + PreDrawCheck(); + return; + } + } + else if (dirty & DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS) + { + if (!UpdateRootParameters(dirty)) + { + SubmitCommandListAndRestartRenderPass("out of descriptors"); + PreDrawCheck(); + return; + } + } + } + + if (!InRenderPass()) + BeginRenderPass(); +} + +void D3D12Device::UpdateRootSignature() +{ + GetCommandList()->SetGraphicsRootSignature(m_root_signatures[static_cast(m_current_pipeline_layout)].Get()); +} + +template +bool D3D12Device::UpdateParametersForLayout(u32 dirty) +{ + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) + { + if (dirty & DIRTY_FLAG_CONSTANT_BUFFER) + cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); + } + + constexpr u32 num_textures = GetActiveTexturesForLayout(layout); + if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0) + { + D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; + D3D12DescriptorHandle gpu_handle; + if (!allocator.Allocate(num_textures, &gpu_handle)) + return false; + + if constexpr (num_textures == 1) + { + m_device->CopyDescriptorsSimple( + 1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + else + { + D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS]; + UINT src_sizes[MAX_TEXTURE_SAMPLERS]; + for (u32 i = 0; i < num_textures; i++) + { + src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor; + src_sizes[i] = 1; + } + m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + } + + if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0) + { + auto& allocator = m_command_lists[m_current_command_list].sampler_allocator; + D3D12DescriptorHandle gpu_handle; + if constexpr (num_textures == 1) + { + if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0])) + return false; + } + else + { + if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data())) + return false; + } + + cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); + } + + if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + { + D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; + D3D12DescriptorHandle gpu_handle; + if (!allocator.Allocate(1, &gpu_handle)) + return false; + + m_device->CopyDescriptorsSimple( + 1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); + } + + return true; +} + +bool D3D12Device::UpdateRootParameters(u32 dirty) +{ + switch (m_current_pipeline_layout) + { + case GPUPipeline::Layout::SingleTextureAndUBO: + return UpdateParametersForLayout(dirty); + + case GPUPipeline::Layout::SingleTextureAndPushConstants: + return UpdateParametersForLayout(dirty); + + case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: + return UpdateParametersForLayout(dirty); + + case GPUPipeline::Layout::MultiTextureAndUBO: + return UpdateParametersForLayout(dirty); + + case GPUPipeline::Layout::MultiTextureAndPushConstants: + return UpdateParametersForLayout(dirty); + + default: + UnreachableCode(); + return false; + } +} + +void D3D12Device::Draw(u32 vertex_count, u32 base_vertex) +{ + PreDrawCheck(); + GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0); +} + +void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + PreDrawCheck(); + GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); +} diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h new file mode 100644 index 000000000..2b5651b3e --- /dev/null +++ b/src/util/d3d12_device.h @@ -0,0 +1,345 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "d3d12_descriptor_heap_manager.h" +#include "d3d12_stream_buffer.h" +#include "gpu_device.h" +#include "gpu_texture.h" + +#include "common/windows_headers.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class D3D12Framebuffer; +class D3D12Pipeline; +class D3D12SwapChain; +class D3D12Texture; +class D3D12TextureBuffer; + +namespace D3D12MA { +class Allocator; +} + +class D3D12Device final : public GPUDevice +{ +public: + friend D3D12Texture; + + template + using ComPtr = Microsoft::WRL::ComPtr; + + enum : u32 + { + NUM_COMMAND_LISTS = 3, + + /// Start/End timestamp queries. + NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2, + }; + +public: + D3D12Device(); + ~D3D12Device() override; + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + + static AdapterAndModeList StaticGetAdapterAndModeList(); + AdapterAndModeList GetAdapterAndModeList() override; + void DestroySurface() override; + + std::string GetDriverInfo() const override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 width, u32 height) override; + void ClearRenderTarget(GPUTexture* t, u32 c) override; + void ClearDepth(GPUTexture* t, float d) override; + void InvalidateRenderTarget(GPUTexture* t) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, DynamicHeapArray* out_binary) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + // Global state accessors + ALWAYS_INLINE static D3D12Device& GetInstance() { return *static_cast(g_gpu_device.get()); } + ALWAYS_INLINE IDXGIAdapter1* GetAdapter() const { return m_adapter.Get(); } + ALWAYS_INLINE ID3D12Device1* GetDevice() const { return m_device.Get(); } + ALWAYS_INLINE ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); } + ALWAYS_INLINE D3D12MA::Allocator* GetAllocator() const { return m_allocator.Get(); } + + void WaitForGPUIdle(); + + // Descriptor manager access. + D3D12DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; } + D3D12DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; } + D3D12DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; } + D3D12DescriptorHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; } + const D3D12DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; } + + // These command buffers are allocated per-frame. They are valid until the command buffer + // is submitted, after that you should call these functions again. + ALWAYS_INLINE ID3D12GraphicsCommandList4* GetCommandList() const + { + return m_command_lists[m_current_command_list].command_lists[1].Get(); + } + ALWAYS_INLINE D3D12StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; } + ID3D12GraphicsCommandList4* GetInitCommandList(); + + // Root signature access. + ComPtr SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); + ComPtr CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc); + + /// Fence value for current command list. + u64 GetCurrentFenceValue() const { return m_current_fence_value; } + + /// Last "completed" fence. + u64 GetCompletedFenceValue() const { return m_completed_fence_value; } + + // Schedule a d3d12 resource for destruction later on. This will occur when the command buffer + // is next re-used, and the GPU has finished working with the specified resource. + void DeferObjectDestruction(ComPtr resource); + void DeferResourceDestruction(ComPtr allocation, ComPtr resource); + void DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor); + + // Wait for a fence to be completed. + // Also invokes callbacks for completion. + void WaitForFence(u64 fence_counter); + + /// Ends any render pass, executes the command buffer, and invalidates cached state. + void SubmitCommandList(bool wait_for_completion); + void SubmitCommandList(bool wait_for_completion, const char* reason, ...); + void SubmitCommandListAndRestartRenderPass(const char* reason); + + void UnbindFramebuffer(D3D12Framebuffer* fb); + void UnbindFramebuffer(D3D12Texture* tex); + void UnbindPipeline(D3D12Pipeline* pl); + void UnbindTexture(D3D12Texture* tex); + void UnbindTextureBuffer(D3D12TextureBuffer* buf); + +protected: + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) override; + void DestroyDevice() override; + + bool ReadPipelineCache(const std::string& filename) override; + bool GetPipelineCacheData(DynamicHeapArray* data) override; + +private: + enum DIRTY_FLAG : u32 + { + DIRTY_FLAG_INITIAL = (1 << 0), + DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1), + DIRTY_FLAG_CONSTANT_BUFFER = (1 << 2), + DIRTY_FLAG_TEXTURES = (1 << 3), + DIRTY_FLAG_SAMPLERS = (1 << 3), + + ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | + DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS, + }; + + struct CommandList + { + // [0] - Init (upload) command buffer, [1] - draw command buffer + std::array, 2> command_allocators; + std::array, 2> command_lists; + D3D12DescriptorAllocator descriptor_allocator; + D3D12GroupedSamplerAllocator sampler_allocator; + u64 fence_counter = 0; + bool init_list_used = false; + bool needs_fence_wait = false; + bool has_timestamp_query = false; + }; + + using SamplerMap = std::unordered_map; + + static void GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory); + + void SetFeatures(); + + bool CreateSwapChain(); + bool CreateSwapChainRTV(); + void DestroySwapChainRTVs(); + void DestroySwapChain(); + + bool CreateCommandLists(); + void DestroyCommandLists(); + bool CreateRootSignatures(); + void DestroyRootSignatures(); + bool CreateBuffers(); + void DestroyBuffers(); + bool CreateDescriptorHeaps(); + void DestroyDescriptorHeaps(); + bool CreateTimestampQuery(); + void DestroyTimestampQuery(); + D3D12DescriptorHandle GetSampler(const GPUSampler::Config& config); + void DestroySamplers(); + void DestroyDeferredObjects(u64 fence_value); + + void RenderBlankFrame(); + void MoveToNextCommandList(); + + bool CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, + D3D12DescriptorHandle* dh); + bool CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); + bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); + bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); + + bool CheckDownloadBufferSize(u32 required_size); + void DestroyDownloadBuffer(); + + /// Set dirty flags on everything to force re-bind at next draw time. + void InvalidateCachedState(); + void SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist); + void SetViewport(ID3D12GraphicsCommandList4* cmdlist); + void SetScissor(ID3D12GraphicsCommandList4* cmdlist); + + /// Applies any changed state. + ID3D12RootSignature* GetCurrentRootSignature() const; + void SetInitialPipelineState(); + void PreDrawCheck(); + + void UpdateRootSignature(); + template + bool UpdateParametersForLayout(u32 dirty); + bool UpdateRootParameters(u32 dirty); + + // Ends a render pass if we're currently in one. + // When Bind() is next called, the pass will be restarted. + void BeginRenderPass(); + void BeginSwapChainRenderPass(); + void EndRenderPass(); + bool InRenderPass(); + + ComPtr m_adapter; + ComPtr m_device; + ComPtr m_command_queue; + ComPtr m_allocator; + + ComPtr m_fence; + HANDLE m_fence_event = {}; + u64 m_current_fence_value = 0; + u64 m_completed_fence_value = 0; + + std::array m_command_lists; + u32 m_current_command_list = NUM_COMMAND_LISTS - 1; + D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; + + ComPtr m_dxgi_factory; + ComPtr m_swap_chain; + std::vector, D3D12DescriptorHandle>> m_swap_chain_buffers; + u32 m_current_swap_chain_buffer = 0; + bool m_allow_tearing_supported = false; + bool m_using_allow_tearing = false; + bool m_is_exclusive_fullscreen = false; + + D3D12DescriptorHeapManager m_descriptor_heap_manager; + D3D12DescriptorHeapManager m_rtv_heap_manager; + D3D12DescriptorHeapManager m_dsv_heap_manager; + D3D12DescriptorHeapManager m_sampler_heap_manager; + D3D12DescriptorHandle m_null_srv_descriptor; + D3D12DescriptorHandle m_point_sampler; + + ComPtr m_timestamp_query_heap; + ComPtr m_timestamp_query_buffer; + ComPtr m_timestamp_query_allocation; + double m_timestamp_frequency = 0.0; + float m_accumulated_gpu_time = 0.0f; + + std::deque>> m_cleanup_resources; + std::deque>> m_cleanup_descriptors; + + std::array, static_cast(GPUPipeline::Layout::MaxCount)> m_root_signatures = {}; + + D3D12StreamBuffer m_vertex_buffer; + D3D12StreamBuffer m_index_buffer; + D3D12StreamBuffer m_uniform_buffer; + D3D12StreamBuffer m_texture_upload_buffer; + + u32 m_uniform_buffer_position = 0; + bool m_in_render_pass = false; + + SamplerMap m_sampler_map; + ComPtr m_pipeline_library; + + ComPtr m_download_buffer_allocation; + ComPtr m_download_buffer; + u32 m_download_buffer_size = 0; + + // Which bindings/state has to be updated before the next draw. + u32 m_dirty_flags = ALL_DIRTY_STATE; + + D3D12Framebuffer* m_current_framebuffer = nullptr; + + D3D12Pipeline* m_current_pipeline = nullptr; + D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + u32 m_current_vertex_stride = 0; + u32 m_current_blend_constant = 0; + GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + + std::array m_current_textures = {}; + std::array m_current_samplers = {}; + D3D12TextureBuffer* m_current_texture_buffer = nullptr; + Common::Rectangle m_current_viewport{0, 0, 1, 1}; + Common::Rectangle m_current_scissor{0, 0, 1, 1}; +}; diff --git a/src/util/d3d12_host_display.cpp b/src/util/d3d12_host_display.cpp deleted file mode 100644 index 428efcc48..000000000 --- a/src/util/d3d12_host_display.cpp +++ /dev/null @@ -1,1062 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "d3d12_host_display.h" -#include "common/assert.h" -#include "common/d3d11/shader_compiler.h" -#include "common/d3d12/context.h" -#include "common/d3d12/shader_cache.h" -#include "common/d3d12/util.h" -#include "common/log.h" -#include "common/string_util.h" -#include "core/settings.h" -#include "display_ps.hlsl.h" -#include "display_ps_alpha.hlsl.h" -#include "display_vs.hlsl.h" -#include "postprocessing_shadergen.h" -#include "imgui.h" -#include "imgui_impl_dx12.h" -#include -#include -Log_SetChannel(D3D12HostDisplay); - -static constexpr const std::array s_clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; - -D3D12HostDisplay::D3D12HostDisplay() = default; - -D3D12HostDisplay::~D3D12HostDisplay() -{ - if (!g_d3d12_context) - return; - - // DestroyRenderSurface() will exec the command list. - DestroySurface(); - DestroyResources(); - g_d3d12_context->Destroy(); -} - -RenderAPI D3D12HostDisplay::GetRenderAPI() const -{ - return RenderAPI::D3D12; -} - -void* D3D12HostDisplay::GetDevice() const -{ - return g_d3d12_context->GetDevice(); -} - -void* D3D12HostDisplay::GetContext() const -{ - return g_d3d12_context.get(); -} - -bool D3D12HostDisplay::HasDevice() const -{ - return static_cast(g_d3d12_context); -} - -bool D3D12HostDisplay::HasSurface() const -{ - return static_cast(m_swap_chain); -} - -std::unique_ptr D3D12HostDisplay::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, - u32 data_stride, bool dynamic /* = false */) -{ - const DXGI_FORMAT dformat = D3D12::Texture::GetDXGIFormat(format); - if (dformat == DXGI_FORMAT_UNKNOWN) - return {}; - - std::unique_ptr tex(std::make_unique()); - if (!tex->Create(width, height, layers, levels, samples, dformat, dformat, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_NONE)) - { - return {}; - } - - if (data && !tex->LoadData(0, 0, width, height, data, data_stride)) - return {}; - - return tex; -} - -bool D3D12HostDisplay::BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) -{ - return static_cast(texture)->BeginStreamUpdate(0, 0, width, height, out_buffer, out_pitch); -} - -void D3D12HostDisplay::EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) -{ - static_cast(texture)->EndStreamUpdate(x, y, width, height); -} - -bool D3D12HostDisplay::UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, - u32 pitch) -{ - return HostDisplay::UpdateTexture(texture, x, y, width, height, data, pitch); -} - -bool D3D12HostDisplay::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - const D3D12::Texture* tex = static_cast(texture); - - if (!m_readback_staging_texture.EnsureSize(width, height, tex->GetDXGIFormat(), false)) - return false; - - const D3D12_RESOURCE_STATES old_state = tex->GetState(); - tex->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - m_readback_staging_texture.CopyFromTexture(tex->GetResource(), 0, x, y, 0, 0, width, height); - tex->TransitionToState(old_state); - - return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_data, out_data_stride); -} - -bool D3D12HostDisplay::SupportsTextureFormat(GPUTexture::Format format) const -{ - const DXGI_FORMAT dfmt = D3D12::Texture::GetDXGIFormat(format); - if (dfmt == DXGI_FORMAT_UNKNOWN) - return false; - - return g_d3d12_context->SupportsTextureFormat(dfmt); -} - -bool D3D12HostDisplay::GetHostRefreshRate(float* refresh_rate) -{ - if (m_swap_chain && IsFullscreen()) - { - DXGI_SWAP_CHAIN_DESC desc; - if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && - desc.BufferDesc.RefreshRate.Denominator > 0) - { - Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, - desc.BufferDesc.RefreshRate.Denominator); - *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / - static_cast(desc.BufferDesc.RefreshRate.Denominator); - return true; - } - } - - return HostDisplay::GetHostRefreshRate(refresh_rate); -} - -void D3D12HostDisplay::SetVSync(bool enabled) -{ - m_vsync_enabled = enabled; -} - -bool D3D12HostDisplay::CreateDevice(const WindowInfo& wi, bool vsync) -{ - ComPtr temp_dxgi_factory; - HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(temp_dxgi_factory.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create DXGI factory: 0x%08X", hr); - return false; - } - - u32 adapter_index; - if (!g_settings.gpu_adapter.empty()) - { - AdapterAndModeList adapter_info(GetAdapterAndModeList(temp_dxgi_factory.Get())); - for (adapter_index = 0; adapter_index < static_cast(adapter_info.adapter_names.size()); adapter_index++) - { - if (g_settings.gpu_adapter == adapter_info.adapter_names[adapter_index]) - break; - } - if (adapter_index == static_cast(adapter_info.adapter_names.size())) - { - Log_WarningPrintf("Could not find adapter '%s', using first (%s)", g_settings.gpu_adapter.c_str(), - adapter_info.adapter_names[0].c_str()); - adapter_index = 0; - } - } - else - { - Log_InfoPrintf("No adapter selected, using first."); - adapter_index = 0; - } - - if (!D3D12::Context::Create(temp_dxgi_factory.Get(), adapter_index, g_settings.gpu_use_debug_device)) - return false; - - if (FAILED(hr)) - { - Log_ErrorPrintf("Failed to create D3D device: 0x%08X", hr); - return false; - } - - m_dxgi_factory = std::move(temp_dxgi_factory); - - m_allow_tearing_supported = false; - ComPtr dxgi_factory5; - hr = m_dxgi_factory.As(&dxgi_factory5); - if (SUCCEEDED(hr)) - { - BOOL allow_tearing_supported = false; - hr = dxgi_factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, - sizeof(allow_tearing_supported)); - if (SUCCEEDED(hr)) - m_allow_tearing_supported = (allow_tearing_supported == TRUE); - } - - m_window_info = wi; - m_vsync_enabled = vsync; - - if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain(nullptr)) - { - m_window_info = {}; - return false; - } - - return true; -} - -bool D3D12HostDisplay::SetupDevice() -{ - if (!CreateResources()) - return false; - - return true; -} - -bool D3D12HostDisplay::MakeCurrent() -{ - return true; -} - -bool D3D12HostDisplay::DoneCurrent() -{ - return true; -} - -bool D3D12HostDisplay::CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode) -{ - HRESULT hr; - - if (m_window_info.type != WindowInfo::Type::Win32) - return false; - - const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); - RECT client_rc{}; - GetClientRect(window_hwnd, &client_rc); - const u32 width = static_cast(client_rc.right - client_rc.left); - const u32 height = static_cast(client_rc.bottom - client_rc.top); - - DXGI_SWAP_CHAIN_DESC swap_chain_desc = {}; - swap_chain_desc.BufferDesc.Width = width; - swap_chain_desc.BufferDesc.Height = height; - swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swap_chain_desc.SampleDesc.Count = 1; - swap_chain_desc.BufferCount = 2; - swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swap_chain_desc.OutputWindow = window_hwnd; - swap_chain_desc.Windowed = TRUE; - swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - - m_using_allow_tearing = (m_allow_tearing_supported && !fullscreen_mode); - if (m_using_allow_tearing) - swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; - - if (fullscreen_mode) - { - swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; - swap_chain_desc.Windowed = FALSE; - swap_chain_desc.BufferDesc = *fullscreen_mode; - } - - Log_InfoPrintf("Creating a %dx%d %s swap chain", swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, - swap_chain_desc.Windowed ? "windowed" : "full-screen"); - - hr = - m_dxgi_factory->CreateSwapChain(g_d3d12_context->GetCommandQueue(), &swap_chain_desc, m_swap_chain.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateSwapChain failed: 0x%08X", hr); - return false; - } - - hr = m_dxgi_factory->MakeWindowAssociation(swap_chain_desc.OutputWindow, DXGI_MWA_NO_WINDOW_CHANGES); - if (FAILED(hr)) - Log_WarningPrintf("MakeWindowAssociation() to disable ALT+ENTER failed"); - - return CreateSwapChainRTV(); -} - -bool D3D12HostDisplay::CreateSwapChainRTV() -{ - DXGI_SWAP_CHAIN_DESC swap_chain_desc; - HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc); - if (FAILED(hr)) - return false; - - for (u32 i = 0; i < swap_chain_desc.BufferCount; i++) - { - ComPtr backbuffer; - hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("GetBuffer for RTV failed: 0x%08X", hr); - return false; - } - - D3D12::Texture tex; - if (!tex.Adopt(std::move(backbuffer), DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_STATE_PRESENT)) - { - return false; - } - - m_swap_chain_buffers.push_back(std::move(tex)); - } - - m_window_info.surface_width = swap_chain_desc.BufferDesc.Width; - m_window_info.surface_height = swap_chain_desc.BufferDesc.Height; - Log_InfoPrintf("Swap chain buffer size: %ux%u", m_window_info.surface_width, m_window_info.surface_height); - - if (m_window_info.type == WindowInfo::Type::Win32) - { - BOOL fullscreen = FALSE; - DXGI_SWAP_CHAIN_DESC desc; - if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && - SUCCEEDED(m_swap_chain->GetDesc(&desc))) - { - m_window_info.surface_refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / - static_cast(desc.BufferDesc.RefreshRate.Denominator); - } - else - { - m_window_info.surface_refresh_rate = 0.0f; - } - } - - m_current_swap_chain_buffer = 0; - return true; -} - -void D3D12HostDisplay::DestroySwapChainRTVs() -{ - for (D3D12::Texture& buffer : m_swap_chain_buffers) - buffer.Destroy(false); - m_swap_chain_buffers.clear(); - m_current_swap_chain_buffer = 0; -} - -bool D3D12HostDisplay::ChangeWindow(const WindowInfo& new_wi) -{ - DestroySurface(); - - m_window_info = new_wi; - return CreateSwapChain(nullptr); -} - -void D3D12HostDisplay::DestroySurface() -{ - m_window_info.SetSurfaceless(); - - // For some reason if we don't execute the command list here, the swap chain is in use.. not sure where. - g_d3d12_context->ExecuteCommandList(true); - - if (IsFullscreen()) - SetFullscreen(false, 0, 0, 0.0f); - - DestroySwapChainRTVs(); - m_swap_chain.Reset(); -} - -void D3D12HostDisplay::ResizeWindow(s32 new_window_width, s32 new_window_height) -{ - if (!m_swap_chain) - return; - - // For some reason if we don't execute the command list here, the swap chain is in use.. not sure where. - g_d3d12_context->ExecuteCommandList(true); - - DestroySwapChainRTVs(); - - HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, - m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); - if (FAILED(hr)) - Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); - - if (!CreateSwapChainRTV()) - Panic("Failed to recreate swap chain RTV after resize"); -} - -bool D3D12HostDisplay::SupportsFullscreen() const -{ - return true; -} - -bool D3D12HostDisplay::IsFullscreen() -{ - BOOL is_fullscreen = FALSE; - return (m_swap_chain && SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen); -} - -bool D3D12HostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) -{ - if (!m_swap_chain) - return false; - - BOOL is_fullscreen = FALSE; - HRESULT hr = m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr); - if (!fullscreen) - { - // leaving fullscreen - if (is_fullscreen) - return SUCCEEDED(m_swap_chain->SetFullscreenState(FALSE, nullptr)); - else - return true; - } - - IDXGIOutput* output; - if (FAILED(hr = m_swap_chain->GetContainingOutput(&output))) - return false; - - DXGI_SWAP_CHAIN_DESC current_desc; - hr = m_swap_chain->GetDesc(¤t_desc); - if (FAILED(hr)) - return false; - - DXGI_MODE_DESC new_mode = current_desc.BufferDesc; - new_mode.Width = width; - new_mode.Height = height; - new_mode.RefreshRate.Numerator = static_cast(std::floor(refresh_rate * 1000.0f)); - new_mode.RefreshRate.Denominator = 1000u; - - DXGI_MODE_DESC closest_mode; - if (FAILED(hr = output->FindClosestMatchingMode(&new_mode, &closest_mode, nullptr)) || - new_mode.Format != current_desc.BufferDesc.Format) - { - Log_ErrorPrintf("Failed to find closest matching mode, hr=%08X", hr); - return false; - } - - if (new_mode.Width == current_desc.BufferDesc.Width && new_mode.Height == current_desc.BufferDesc.Width && - new_mode.RefreshRate.Numerator == current_desc.BufferDesc.RefreshRate.Numerator && - new_mode.RefreshRate.Denominator == current_desc.BufferDesc.RefreshRate.Denominator) - { - Log_InfoPrintf("Fullscreen mode already set"); - return true; - } - - g_d3d12_context->ExecuteCommandList(true); - DestroySwapChainRTVs(); - m_swap_chain.Reset(); - - if (!CreateSwapChain(&closest_mode)) - { - Log_ErrorPrintf("Failed to create a fullscreen swap chain"); - if (!CreateSwapChain(nullptr)) - Panic("Failed to recreate windowed swap chain"); - - return false; - } - - return true; -} - -HostDisplay::AdapterAndModeList D3D12HostDisplay::GetAdapterAndModeList() -{ - return GetAdapterAndModeList(m_dxgi_factory.Get()); -} - -bool D3D12HostDisplay::CreateResources() -{ - D3D12::RootSignatureBuilder rsbuilder; - rsbuilder.Add32BitConstants(0, 4, D3D12_SHADER_VISIBILITY_VERTEX); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_ALL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_ALL); - m_display_root_signature = rsbuilder.Create(); - if (!m_display_root_signature) - return false; - - rsbuilder.SetInputAssemblerFlag(); - rsbuilder.Add32BitConstants(0, FrontendCommon::PostProcessingShader::PUSH_CONSTANT_SIZE_THRESHOLD / sizeof(u32), - D3D12_SHADER_VISIBILITY_ALL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - m_post_processing_root_signature = rsbuilder.Create(); - if (!m_post_processing_root_signature) - return false; - - rsbuilder.SetInputAssemblerFlag(); - rsbuilder.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsbuilder.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - m_post_processing_cb_root_signature = rsbuilder.Create(); - if (!m_post_processing_cb_root_signature) - return false; - - D3D12::GraphicsPipelineBuilder gpbuilder; - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetRootSignature(m_display_root_signature.Get()); - gpbuilder.SetVertexShader(s_display_vs_bytecode, sizeof(s_display_vs_bytecode)); - gpbuilder.SetPixelShader(s_display_ps_bytecode, sizeof(s_display_ps_bytecode)); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); - m_display_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), false); - if (!m_display_pipeline) - return false; - - gpbuilder.SetPixelShader(s_display_ps_alpha_bytecode, sizeof(s_display_ps_alpha_bytecode)); - gpbuilder.SetBlendState(0, true, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, - D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_COLOR_WRITE_ENABLE_ALL); - m_software_cursor_pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), false); - if (!m_software_cursor_pipeline) - return false; - - D3D12_SAMPLER_DESC desc = {}; - D3D12::SetDefaultSampler(&desc); - desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; - desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - - if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_point_sampler)) - return false; - - g_d3d12_context->GetDevice()->CreateSampler(&desc, m_point_sampler.cpu_handle); - - desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - - if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_linear_sampler)) - return false; - - g_d3d12_context->GetDevice()->CreateSampler(&desc, m_linear_sampler.cpu_handle); - - if (!g_d3d12_context->GetSamplerHeapManager().Allocate(&m_border_sampler)) - return false; - - desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_BORDER; - desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_BORDER; - desc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - desc.BorderColor[0] = 0.0f; - desc.BorderColor[1] = 0.0f; - desc.BorderColor[2] = 0.0f; - desc.BorderColor[3] = 1.0f; - g_d3d12_context->GetDevice()->CreateSampler(&desc, m_border_sampler.cpu_handle); - - return true; -} - -void D3D12HostDisplay::DestroyResources() -{ - HostDisplay::DestroyResources(); - - m_post_processing_cbuffer.Destroy(false); - m_post_processing_chain.ClearStages(); - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - m_post_processing_cb_root_signature.Reset(); - m_post_processing_root_signature.Reset(); - - m_readback_staging_texture.Destroy(false); - g_d3d12_context->GetSamplerHeapManager().Free(&m_border_sampler); - g_d3d12_context->GetSamplerHeapManager().Free(&m_linear_sampler); - g_d3d12_context->GetSamplerHeapManager().Free(&m_point_sampler); - m_software_cursor_pipeline.Reset(); - m_display_pipeline.Reset(); - m_display_root_signature.Reset(); -} - -bool D3D12HostDisplay::CreateImGuiContext() -{ - ImGui::GetIO().DisplaySize.x = static_cast(m_window_info.surface_width); - ImGui::GetIO().DisplaySize.y = static_cast(m_window_info.surface_height); - - return ImGui_ImplDX12_Init(DXGI_FORMAT_R8G8B8A8_UNORM); -} - -void D3D12HostDisplay::DestroyImGuiContext() -{ - g_d3d12_context->WaitForGPUIdle(); - - ImGui_ImplDX12_Shutdown(); -} - -bool D3D12HostDisplay::UpdateImGuiFontTexture() -{ - return ImGui_ImplDX12_CreateFontsTexture(); -} - -bool D3D12HostDisplay::Render(bool skip_present) -{ - if (skip_present || !m_swap_chain) - { - if (ImGui::GetCurrentContext()) - ImGui::Render(); - - return false; - } - - D3D12::Texture& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; - m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast(m_swap_chain_buffers.size())); - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - RenderDisplay(cmdlist, &swap_chain_buf); - - if (ImGui::GetCurrentContext()) - RenderImGui(cmdlist); - - RenderSoftwareCursor(cmdlist); - - swap_chain_buf.TransitionToState(D3D12_RESOURCE_STATE_PRESENT); - g_d3d12_context->ExecuteCommandList(false); - - if (!m_vsync_enabled && m_using_allow_tearing) - m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); - else - m_swap_chain->Present(BoolToUInt32(m_vsync_enabled), 0); - - return true; -} - -bool D3D12HostDisplay::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - static constexpr DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; - static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; - - D3D12::Texture render_texture; - if (!render_texture.Create(width, height, 1, 1, 1, format, DXGI_FORMAT_UNKNOWN, format, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || - !m_readback_staging_texture.EnsureSize(width, height, format, false)) - { - return false; - } - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(cmdlist, &render_texture, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - render_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(render_texture.GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &render_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - - if (HasDisplayTexture()) - { - RenderDisplay(cmdlist, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - } - - cmdlist->OMSetRenderTargets(0, nullptr, FALSE, nullptr); - - render_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - m_readback_staging_texture.CopyFromTexture(render_texture, 0, 0, 0, 0, 0, width, height); - - const u32 stride = sizeof(u32) * width; - out_pixels->resize(width * height); - *out_stride = stride; - *out_format = hdformat; - - return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_pixels->data(), stride); -} - -bool D3D12HostDisplay::SetGPUTimingEnabled(bool enabled) -{ - g_d3d12_context->SetEnableGPUTiming(enabled); - m_gpu_timing_enabled = enabled; - return true; -} - -float D3D12HostDisplay::GetAndResetAccumulatedGPUTime() -{ - return g_d3d12_context->GetAndResetAccumulatedGPUTime(); -} - -void D3D12HostDisplay::RenderImGui(ID3D12GraphicsCommandList* cmdlist) -{ - ImGui::Render(); - ImGui_ImplDX12_RenderDrawData(ImGui::GetDrawData()); -} - -void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist, D3D12::Texture* swap_chain_buf) -{ - const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(cmdlist, swap_chain_buf, left, top, width, height, - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - GetWindowWidth(), GetWindowHeight()); - return; - } - - swap_chain_buf->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(swap_chain_buf->GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &swap_chain_buf->GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - - if (!HasDisplayTexture()) - return; - - RenderDisplay(cmdlist, left, top, width, height, static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, IsUsingLinearFiltering()); -} - -void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, - D3D12::Texture* texture, s32 texture_view_x, s32 texture_view_y, - s32 texture_view_width, s32 texture_view_height, bool linear_filter) -{ - const float position_adjust = linear_filter ? 0.5f : 0.0f; - const float size_adjust = linear_filter ? 1.0f : 0.0f; - const float uniforms[4] = { - (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), - (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; - - cmdlist->SetGraphicsRootSignature(m_display_root_signature.Get()); - cmdlist->SetPipelineState(m_display_pipeline.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, static_cast(std::size(uniforms)), uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, texture->GetSRVDescriptor()); - cmdlist->SetGraphicsRootDescriptorTable(2, linear_filter ? m_linear_sampler : m_point_sampler); - - D3D12::SetViewportAndClampScissor(cmdlist, left, top, width, height); - - cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - cmdlist->DrawInstanced(3, 1, 0, 0); -} - -void D3D12HostDisplay::RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist) -{ - if (!HasSoftwareCursor()) - return; - - const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); - RenderSoftwareCursor(cmdlist, left, top, width, height, m_cursor_texture.get()); -} - -void D3D12HostDisplay::RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, - s32 height, GPUTexture* texture_handle) -{ - const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; - - cmdlist->SetGraphicsRootSignature(m_display_root_signature.Get()); - cmdlist->SetPipelineState(m_software_cursor_pipeline.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, static_cast(std::size(uniforms)), uniforms, 0); - cmdlist->SetGraphicsRootDescriptorTable(1, static_cast(texture_handle)->GetSRVDescriptor()); - cmdlist->SetGraphicsRootDescriptorTable(2, m_linear_sampler); - - D3D12::SetViewportAndClampScissor(cmdlist, left, top, width, height); - - cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - cmdlist->DrawInstanced(3, 1, 0, 0); -} - -HostDisplay::AdapterAndModeList D3D12HostDisplay::StaticGetAdapterAndModeList() -{ - ComPtr dxgi_factory; - HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(dxgi_factory.GetAddressOf())); - if (FAILED(hr)) - return {}; - - return GetAdapterAndModeList(dxgi_factory.Get()); -} - -HostDisplay::AdapterAndModeList D3D12HostDisplay::GetAdapterAndModeList(IDXGIFactory* dxgi_factory) -{ - AdapterAndModeList adapter_info; - ComPtr current_adapter; - while (SUCCEEDED(dxgi_factory->EnumAdapters(static_cast(adapter_info.adapter_names.size()), - current_adapter.ReleaseAndGetAddressOf()))) - { - DXGI_ADAPTER_DESC adapter_desc; - std::string adapter_name; - if (SUCCEEDED(current_adapter->GetDesc(&adapter_desc))) - { - char adapter_name_buffer[128]; - const int name_length = WideCharToMultiByte(CP_UTF8, 0, adapter_desc.Description, - static_cast(std::wcslen(adapter_desc.Description)), - adapter_name_buffer, countof(adapter_name_buffer), 0, nullptr); - if (name_length >= 0) - adapter_name.assign(adapter_name_buffer, static_cast(name_length)); - else - adapter_name.assign("(Unknown)"); - } - else - { - adapter_name.assign("(Unknown)"); - } - - if (adapter_info.fullscreen_modes.empty()) - { - ComPtr output; - if (SUCCEEDED(current_adapter->EnumOutputs(0, &output))) - { - UINT num_modes = 0; - if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, nullptr))) - { - std::vector modes(num_modes); - if (SUCCEEDED(output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, modes.data()))) - { - for (const DXGI_MODE_DESC& mode : modes) - { - adapter_info.fullscreen_modes.push_back(StringUtil::StdStringFromFormat( - "%u x %u @ %f hz", mode.Width, mode.Height, - static_cast(mode.RefreshRate.Numerator) / static_cast(mode.RefreshRate.Denominator))); - } - } - } - } - } - - // handle duplicate adapter names - if (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), - [&adapter_name](const std::string& other) { return (adapter_name == other); })) - { - std::string original_adapter_name = std::move(adapter_name); - - u32 current_extra = 2; - do - { - adapter_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra); - current_extra++; - } while (std::any_of(adapter_info.adapter_names.begin(), adapter_info.adapter_names.end(), - [&adapter_name](const std::string& other) { return (adapter_name == other); })); - } - - adapter_info.adapter_names.push_back(std::move(adapter_name)); - } - - return adapter_info; -} - -D3D12HostDisplay::PostProcessingStage::PostProcessingStage(PostProcessingStage&& move) - : pipeline(std::move(move.pipeline)), output_texture(std::move(move.output_texture)), - uniforms_size(move.uniforms_size) -{ - move.uniforms_size = 0; -} - -D3D12HostDisplay::PostProcessingStage::~PostProcessingStage() -{ - output_texture.Destroy(true); -} - -bool D3D12HostDisplay::SetPostProcessingChain(const std::string_view& config) -{ - g_d3d12_context->ExecuteCommandList(true); - - if (config.empty()) - { - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return true; - } - - if (!m_post_processing_chain.CreateFromString(config)) - return false; - - m_post_processing_stages.clear(); - - D3D12::ShaderCache shader_cache; - shader_cache.Open(EmuFolders::Cache, g_d3d12_context->GetFeatureLevel(), g_settings.gpu_use_debug_device); - - FrontendCommon::PostProcessingShaderGen shadergen(RenderAPI::D3D12, false); - bool only_use_push_constants = true; - - for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) - { - const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); - const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); - const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); - const bool use_push_constants = shader.UsePushConstants(); - only_use_push_constants &= use_push_constants; - - PostProcessingStage stage; - stage.uniforms_size = shader.GetUniformsSize(); - - ComPtr vs_blob(shader_cache.GetVertexShader(vs)); - ComPtr ps_blob(shader_cache.GetPixelShader(ps)); - if (!vs_blob || !ps_blob) - { - Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - D3D12::GraphicsPipelineBuilder gpbuilder; - gpbuilder.SetVertexShader(vs_blob.Get()); - gpbuilder.SetPixelShader(ps_blob.Get()); - gpbuilder.SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetRootSignature(use_push_constants ? m_post_processing_root_signature.Get() : - m_post_processing_cb_root_signature.Get()); - gpbuilder.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); - - stage.pipeline = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache); - if (!stage.pipeline) - { - Log_ErrorPrintf("Failed to compile one or more post-processing pipelines, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - D3D12::SetObjectNameFormatted(stage.pipeline.Get(), "%s Pipeline", shader.GetName().c_str()); - - m_post_processing_stages.push_back(std::move(stage)); - } - - constexpr u32 UBO_SIZE = 1 * 1024 * 1024; - if (!only_use_push_constants && m_post_processing_cbuffer.GetSize() < UBO_SIZE) - { - if (!m_post_processing_cbuffer.Create(UBO_SIZE)) - { - Log_ErrorPrintf("Failed to allocate %u byte constant buffer for postprocessing", UBO_SIZE); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - D3D12::SetObjectName(m_post_processing_cbuffer.GetBuffer(), "Post Processing Uniform Buffer"); - } - - m_post_processing_timer.Reset(); - return true; -} - -bool D3D12HostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) -{ - DebugAssert(!m_post_processing_stages.empty()); - - const DXGI_FORMAT tex_format = DXGI_FORMAT_R8G8B8A8_UNORM; - const DXGI_FORMAT srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; - const DXGI_FORMAT rtv_format = DXGI_FORMAT_R8G8B8A8_UNORM; - - if (m_post_processing_input_texture.GetWidth() != target_width || - m_post_processing_input_texture.GetHeight() != target_height) - { - if (!m_post_processing_input_texture.Create(target_width, target_height, 1, 1, 1, tex_format, srv_format, - rtv_format, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) - { - return false; - } - D3D12::SetObjectName(m_post_processing_input_texture.GetResource(), "Post Processing Input Texture"); - } - - const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); - for (u32 i = 0; i < target_count; i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) - { - if (!pps.output_texture.Create(target_width, target_height, 1, 1, 1, tex_format, srv_format, rtv_format, - DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) - { - return false; - } - D3D12::SetObjectNameFormatted(pps.output_texture.GetResource(), "Post Processing Output Texture %u", i); - } - } - - return true; -} - -void D3D12HostDisplay::ApplyPostProcessingChain(ID3D12GraphicsCommandList* cmdlist, D3D12::Texture* final_target, - s32 final_left, s32 final_top, s32 final_width, s32 final_height, - D3D12::Texture* texture, s32 texture_view_x, s32 texture_view_y, - s32 texture_view_width, s32 texture_view_height, u32 target_width, - u32 target_height) -{ - if (!CheckPostProcessingRenderTargets(target_width, target_height)) - { - final_target->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(final_target->GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &final_target->GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - - RenderDisplay(cmdlist, final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - return; - } - - // downsample/upsample - use same viewport for remainder - m_post_processing_input_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(m_post_processing_input_texture.GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, - nullptr); - cmdlist->OMSetRenderTargets(1, &m_post_processing_input_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - RenderDisplay(cmdlist, final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - m_post_processing_input_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - const s32 orig_texture_width = texture_view_width; - const s32 orig_texture_height = texture_view_height; - texture = &m_post_processing_input_texture; - texture_view_x = final_left; - texture_view_y = final_top; - texture_view_width = final_width; - texture_view_height = final_height; - - const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; - for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - - const bool use_push_constants = m_post_processing_chain.GetShaderStage(i).UsePushConstants(); - if (use_push_constants) - { - u8 buffer[FrontendCommon::PostProcessingShader::PUSH_CONSTANT_SIZE_THRESHOLD]; - Assert(pps.uniforms_size <= sizeof(buffer)); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - buffer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, - texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, - static_cast(m_post_processing_timer.GetTimeSeconds())); - - cmdlist->SetGraphicsRootSignature(m_post_processing_root_signature.Get()); - cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(buffer) / sizeof(u32), buffer, 0); - } - else - { - if (!m_post_processing_cbuffer.ReserveMemory(pps.uniforms_size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) - { - Panic("Failed to reserve space in post-processing UBO"); - } - - const u32 offset = m_post_processing_cbuffer.GetCurrentOffset(); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - m_post_processing_cbuffer.GetCurrentHostPointer(), texture->GetWidth(), texture->GetHeight(), texture_view_x, - texture_view_y, texture_view_width, texture_view_height, GetWindowWidth(), GetWindowHeight(), - orig_texture_width, orig_texture_height, static_cast(m_post_processing_timer.GetTimeSeconds())); - m_post_processing_cbuffer.CommitMemory(pps.uniforms_size); - - cmdlist->SetGraphicsRootSignature(m_post_processing_cb_root_signature.Get()); - cmdlist->SetGraphicsRootConstantBufferView(0, m_post_processing_cbuffer.GetGPUPointer() + offset); - } - - D3D12::Texture* rt = (i != final_stage) ? &pps.output_texture : final_target; - rt->TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(rt->GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &rt->GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - - cmdlist->SetPipelineState(pps.pipeline.Get()); - cmdlist->SetGraphicsRootDescriptorTable(1, texture->GetSRVDescriptor()); - cmdlist->SetGraphicsRootDescriptorTable(2, m_border_sampler); - - cmdlist->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - cmdlist->DrawInstanced(3, 1, 0, 0); - - if (i != final_stage) - { - pps.output_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - texture = &pps.output_texture; - } - } -} diff --git a/src/util/d3d12_host_display.h b/src/util/d3d12_host_display.h deleted file mode 100644 index b683ec23f..000000000 --- a/src/util/d3d12_host_display.h +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/d3d12/descriptor_heap_manager.h" -#include "common/d3d12/staging_texture.h" -#include "common/d3d12/stream_buffer.h" -#include "common/d3d12/texture.h" -#include "common/timer.h" -#include "common/window_info.h" -#include "common/windows_headers.h" -#include "host_display.h" -#include "postprocessing_chain.h" -#include -#include -#include -#include -#include -#include -#include - -class D3D12HostDisplay final : public HostDisplay -{ -public: - template - using ComPtr = Microsoft::WRL::ComPtr; - - D3D12HostDisplay(); - ~D3D12HostDisplay(); - - RenderAPI GetRenderAPI() const override; - void* GetDevice() const override; - void* GetContext() const override; - - bool HasDevice() const override; - bool HasSurface() const override; - - bool CreateDevice(const WindowInfo& wi, bool vsync) override; - bool SetupDevice() override; - - bool MakeCurrent() override; - bool DoneCurrent() override; - - bool ChangeWindow(const WindowInfo& new_wi) override; - void ResizeWindow(s32 new_window_width, s32 new_window_height) override; - bool SupportsFullscreen() const override; - bool IsFullscreen() override; - bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; - AdapterAndModeList GetAdapterAndModeList() override; - void DestroySurface() override; - - bool SetPostProcessingChain(const std::string_view& config) override; - - std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, u32 data_stride, - bool dynamic = false) override; - bool BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) override; - void EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) override; - bool UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; - bool SupportsTextureFormat(GPUTexture::Format format) const override; - - bool GetHostRefreshRate(float* refresh_rate) override; - - void SetVSync(bool enabled) override; - - bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; - - bool SetGPUTimingEnabled(bool enabled) override; - float GetAndResetAccumulatedGPUTime() override; - - static AdapterAndModeList StaticGetAdapterAndModeList(); - -protected: - struct PostProcessingStage - { - PostProcessingStage() = default; - PostProcessingStage(PostProcessingStage&& move); - ~PostProcessingStage(); - - ComPtr pipeline; - D3D12::Texture output_texture; - u32 uniforms_size = 0; - }; - - static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); - - virtual bool CreateResources() override; - virtual void DestroyResources() override; - - virtual bool CreateImGuiContext() override; - virtual void DestroyImGuiContext() override; - virtual bool UpdateImGuiFontTexture() override; - - bool CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode); - bool CreateSwapChainRTV(); - void DestroySwapChainRTVs(); - - void RenderDisplay(ID3D12GraphicsCommandList* cmdlist, D3D12::Texture* swap_chain_buf); - void RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist); - void RenderImGui(ID3D12GraphicsCommandList* cmdlist); - - void RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, - D3D12::Texture* texture, s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, bool linear_filter); - void RenderSoftwareCursor(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, - GPUTexture* texture_handle); - - bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); - void ApplyPostProcessingChain(ID3D12GraphicsCommandList* cmdlist, D3D12::Texture* final_target, s32 final_left, - s32 final_top, s32 final_width, s32 final_height, D3D12::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, - u32 target_width, u32 target_height); - - ComPtr m_dxgi_factory; - ComPtr m_swap_chain; - std::vector m_swap_chain_buffers; - u32 m_current_swap_chain_buffer = 0; - - ComPtr m_display_root_signature; - ComPtr m_display_pipeline; - ComPtr m_software_cursor_pipeline; - D3D12::DescriptorHandle m_point_sampler; - D3D12::DescriptorHandle m_linear_sampler; - D3D12::DescriptorHandle m_border_sampler; - - D3D12::Texture m_display_pixels_texture; - D3D12::StagingTexture m_readback_staging_texture; - - ComPtr m_post_processing_root_signature; - ComPtr m_post_processing_cb_root_signature; - FrontendCommon::PostProcessingChain m_post_processing_chain; - D3D12::StreamBuffer m_post_processing_cbuffer; - D3D12::Texture m_post_processing_input_texture; - std::vector m_post_processing_stages; - Common::Timer m_post_processing_timer; - - bool m_allow_tearing_supported = false; - bool m_using_allow_tearing = false; -}; diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp new file mode 100644 index 000000000..c5362df0b --- /dev/null +++ b/src/util/d3d12_pipeline.cpp @@ -0,0 +1,251 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d12_pipeline.h" +#include "d3d12_builders.h" +#include "d3d12_device.h" +#include "d3d_common.h" + +#include "common/assert.h" +#include "common/log.h" +#include "common/sha1_digest.h" +#include "common/string_util.h" + +#include + +Log_SetChannel(D3D12Device); + +static u32 s_next_bad_shader_id = 1; + +D3D12Shader::D3D12Shader(GPUShaderStage stage, Bytecode bytecode) : GPUShader(stage), m_bytecode(std::move(bytecode)) +{ +} + +D3D12Shader::~D3D12Shader() = default; + +void D3D12Shader::SetDebugName(const std::string_view& name) +{ +} + +std::unique_ptr D3D12Device::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + // Can't do much at this point. + std::vector bytecode(data.begin(), data.end()); + return std::unique_ptr(new D3D12Shader(stage, std::move(bytecode))); +} + +std::unique_ptr D3D12Device::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary) +{ + std::optional> bytecode = + D3DCommon::CompileShader(m_feature_level, m_debug_device, stage, source, entry_point); + if (!bytecode.has_value()) + return {}; + + std::unique_ptr ret = CreateShaderFromBinary(stage, bytecode.value()); + if (ret && out_binary) + *out_binary = std::move(bytecode.value()); + + return ret; +} + +////////////////////////////////////////////////////////////////////////// + +D3D12Pipeline::D3D12Pipeline(Microsoft::WRL::ComPtr pipeline, Layout layout, + D3D12_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_constants) + : GPUPipeline(), m_pipeline(std::move(pipeline)), m_layout(layout), m_topology(topology), + m_vertex_stride(vertex_stride), m_blend_constants(blend_constants), + m_blend_constants_f(GPUDevice::RGBA8ToFloat(blend_constants)) +{ +} + +D3D12Pipeline::~D3D12Pipeline() +{ + D3D12Device::GetInstance().DeferObjectDestruction(std::move(m_pipeline)); +} + +void D3D12Pipeline::SetDebugName(const std::string_view& name) +{ + D3D12::SetObjectName(m_pipeline.Get(), name); +} + +std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config) +{ + SHA1Digest hash; + hash.Update(&config.layout, sizeof(config.layout)); + hash.Update(&config.primitive, sizeof(config.primitive)); + if (!config.input_layout.vertex_attributes.empty()) + { + hash.Update(config.input_layout.vertex_attributes.data(), + sizeof(VertexAttribute) * static_cast(config.input_layout.vertex_attributes.size())); + hash.Update(&config.input_layout.vertex_stride, sizeof(config.input_layout.vertex_stride)); + } + hash.Update(&config.rasterization.key, sizeof(config.rasterization.key)); + hash.Update(&config.depth.key, sizeof(config.depth.key)); + hash.Update(&config.blend.key, sizeof(config.blend.key)); + if (const D3D12Shader* shader = static_cast(config.vertex_shader)) + hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); + if (const D3D12Shader* shader = static_cast(config.fragment_shader)) + hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); + hash.Update(&config.color_format, sizeof(config.color_format)); + hash.Update(&config.depth_format, sizeof(config.depth_format)); + hash.Update(&config.samples, sizeof(config.samples)); + hash.Update(&config.per_sample_shading, sizeof(config.per_sample_shading)); + + u8 digest[SHA1Digest::DIGEST_SIZE]; + hash.Final(digest); + return SHA1Digest::DigestToString(digest); +} + +std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = + {{ + D3D_PRIMITIVE_TOPOLOGY_POINTLIST, // Points + D3D_PRIMITIVE_TOPOLOGY_LINELIST, // Lines + D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, // Triangles + D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, // TriangleStrips + }}; + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> + primitive_types = {{ + D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, // Points + D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, // Lines + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // Triangles + D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // TriangleStrips + }}; + + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const DXGI_FORMAT + format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT}, // Float + {DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UINT}, // UInt8 + {DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_SINT}, // SInt8 + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM}, // UNorm8 + {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_UINT}, // UInt16 + {DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_SINT}, // SInt16 + {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R16G16B16A16_UNORM}, // UNorm16 + {DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_UINT}, // UInt32 + {DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32A32_SINT}, // SInt32 + }; + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + D3D12_CULL_MODE_NONE, // None + D3D12_CULL_MODE_FRONT, // Front + D3D12_CULL_MODE_BACK, // Back + }}; + + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> + compare_mapping = {{ + D3D12_COMPARISON_FUNC_NEVER, // Never + D3D12_COMPARISON_FUNC_ALWAYS, // Always + D3D12_COMPARISON_FUNC_LESS, // Less + D3D12_COMPARISON_FUNC_LESS_EQUAL, // LessEqual + D3D12_COMPARISON_FUNC_GREATER, // Greater + D3D12_COMPARISON_FUNC_GREATER_EQUAL, // GreaterEqual + D3D12_COMPARISON_FUNC_EQUAL, // Equal + }}; + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + D3D12_BLEND_ZERO, // Zero + D3D12_BLEND_ONE, // One + D3D12_BLEND_SRC_COLOR, // SrcColor + D3D12_BLEND_INV_SRC_COLOR, // InvSrcColor + D3D12_BLEND_DEST_COLOR, // DstColor + D3D12_BLEND_INV_DEST_COLOR, // InvDstColor + D3D12_BLEND_SRC_ALPHA, // SrcAlpha + D3D12_BLEND_INV_SRC_ALPHA, // InvSrcAlpha + D3D12_BLEND_SRC1_ALPHA, // SrcAlpha1 + D3D12_BLEND_INV_SRC1_ALPHA, // InvSrcAlpha1 + D3D12_BLEND_DEST_ALPHA, // DstAlpha + D3D12_BLEND_INV_DEST_ALPHA, // InvDstAlpha + D3D12_BLEND_BLEND_FACTOR, // ConstantColor + D3D12_BLEND_INV_BLEND_FACTOR, // InvConstantColor + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + D3D12_BLEND_OP_ADD, // Add + D3D12_BLEND_OP_SUBTRACT, // Subtract + D3D12_BLEND_OP_REV_SUBTRACT, // ReverseSubtract + D3D12_BLEND_OP_MIN, // Min + D3D12_BLEND_OP_MAX, // Max + }}; + + D3D12::GraphicsPipelineBuilder gpb; + gpb.SetRootSignature(m_root_signatures[static_cast(config.layout)].Get()); + gpb.SetVertexShader(static_cast(config.vertex_shader)->GetBytecodeData(), + static_cast(config.vertex_shader)->GetBytecodeSize()); + gpb.SetPixelShader(static_cast(config.fragment_shader)->GetBytecodeData(), + static_cast(config.fragment_shader)->GetBytecodeSize()); + gpb.SetPrimitiveTopologyType(primitive_types[static_cast(config.primitive)]); + + if (!config.input_layout.vertex_attributes.empty()) + { + for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; + DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); + gpb.AddVertexAttribute( + "ATTR", i, format_mapping[static_cast(va.type.GetValue())][static_cast(va.components.GetValue() - 1)], + 0, va.offset); + } + } + + gpb.SetRasterizationState(D3D12_FILL_MODE_SOLID, + cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())], false); + if (config.samples > 1) + gpb.SetMultisamples(config.samples); + gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write, + config.depth.depth_write, compare_mapping[static_cast(config.depth.depth_test.GetValue())]); + gpb.SetNoStencilState(); + + gpb.SetBlendState(0, config.blend.enable, blend_mapping[static_cast(config.blend.src_blend.GetValue())], + blend_mapping[static_cast(config.blend.dst_blend.GetValue())], + op_mapping[static_cast(config.blend.blend_op.GetValue())], + blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())], + blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())], + op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())], config.blend.write_mask); + + if (config.color_format != GPUTexture::Format::Unknown) + gpb.SetRenderTarget(0, D3DCommon::GetFormatMapping(config.color_format).rtv_format); + + if (config.depth_format != GPUTexture::Format::Unknown) + gpb.SetDepthStencilFormat(D3DCommon::GetFormatMapping(config.depth_format).dsv_format); + + ComPtr pipeline; + if (m_pipeline_library) + { + const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config)); + HRESULT hr = + m_pipeline_library->LoadGraphicsPipeline(name.c_str(), gpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf())); + if (FAILED(hr)) + { + // E_INVALIDARG = not found. + if (hr != E_INVALIDARG) + Log_ErrorPrintf("LoadGraphicsPipeline() failed with HRESULT %08X", hr); + + // Need to create it normally. + pipeline = gpb.Create(m_device.Get(), false); + + // Store if it wasn't an OOM or something else. + if (pipeline && hr == E_INVALIDARG) + { + hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get()); + if (FAILED(hr)) + Log_ErrorPrintf("StorePipeline() failed with HRESULT %08X", hr); + } + } + } + else + { + pipeline = gpb.Create(m_device.Get(), false); + } + + if (!pipeline) + return {}; + + return std::unique_ptr(new D3D12Pipeline( + pipeline, config.layout, primitives[static_cast(config.primitive)], + config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant)); +} diff --git a/src/util/d3d12_pipeline.h b/src/util/d3d12_pipeline.h new file mode 100644 index 000000000..68f99fdd8 --- /dev/null +++ b/src/util/d3d12_pipeline.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_device.h" + +#include "common/windows_headers.h" + +#include +#include +#include + +class D3D12Device; + +class D3D12Shader final : public GPUShader +{ + friend D3D12Device; + +public: + using Bytecode = std::vector; + + ~D3D12Shader() override; + + ALWAYS_INLINE const Bytecode& GetBytecode() const { return m_bytecode; } + ALWAYS_INLINE D3D12_SHADER_BYTECODE GetD3DBytecode() const { return {m_bytecode.data(), m_bytecode.size()}; } + ALWAYS_INLINE const u8* GetBytecodeData() const { return m_bytecode.data(); } + ALWAYS_INLINE u32 GetBytecodeSize() const { return static_cast(m_bytecode.size()); } + + void SetDebugName(const std::string_view& name) override; + +private: + D3D12Shader(GPUShaderStage stage, Bytecode bytecode); + + Bytecode m_bytecode; +}; + +class D3D12Pipeline final : public GPUPipeline +{ + friend D3D12Device; + +public: + ~D3D12Pipeline() override; + + ALWAYS_INLINE ID3D12PipelineState* GetPipeline() const { return m_pipeline.Get(); } + ALWAYS_INLINE Layout GetLayout() const { return m_layout; } + ALWAYS_INLINE D3D12_PRIMITIVE_TOPOLOGY GetTopology() const { return m_topology; } + ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; } + ALWAYS_INLINE u32 GetBlendConstants() const { return m_blend_constants; } + ALWAYS_INLINE const std::array& GetBlendConstantsF() const { return m_blend_constants_f; } + ALWAYS_INLINE bool HasVertexStride() const { return (m_vertex_stride > 0); } + + void SetDebugName(const std::string_view& name) override; + + static std::string GetPipelineName(const GraphicsConfig& config); + +private: + D3D12Pipeline(Microsoft::WRL::ComPtr pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology, + u32 vertex_stride, u32 blend_constants); + + Microsoft::WRL::ComPtr m_pipeline; + Layout m_layout; + D3D12_PRIMITIVE_TOPOLOGY m_topology; + u32 m_vertex_stride; + u32 m_blend_constants; + std::array m_blend_constants_f; +}; diff --git a/src/common/d3d12/stream_buffer.cpp b/src/util/d3d12_stream_buffer.cpp similarity index 79% rename from src/common/d3d12/stream_buffer.cpp rename to src/util/d3d12_stream_buffer.cpp index 18b82f013..c2e3f63ee 100644 --- a/src/common/d3d12/stream_buffer.cpp +++ b/src/util/d3d12_stream_buffer.cpp @@ -1,57 +1,67 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. -#include "stream_buffer.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" +#include "d3d12_stream_buffer.h" +#include "d3d12_device.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" + +#include "D3D12MemAlloc.h" + #include -#include -Log_SetChannel(D3D12::StreamBuffer); -namespace D3D12 { -StreamBuffer::StreamBuffer() = default; +Log_SetChannel(D3D12StreamBuffer); -StreamBuffer::~StreamBuffer() +D3D12StreamBuffer::D3D12StreamBuffer() = default; + +D3D12StreamBuffer::~D3D12StreamBuffer() { Destroy(); } -bool StreamBuffer::Create(u32 size) +bool D3D12StreamBuffer::Create(u32 size) { - static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD}; const D3D12_RESOURCE_DESC resource_desc = { D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE}; - Microsoft::WRL::ComPtr buffer; + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED; + allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; - HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource(&heap_properties, D3D12_HEAP_FLAG_NONE, - &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, IID_PPV_ARGS(buffer.GetAddressOf())); - AssertMsg(SUCCEEDED(hr), "Allocate buffer"); + Microsoft::WRL::ComPtr buffer; + Microsoft::WRL::ComPtr allocation; + HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( + &allocationDesc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.ReleaseAndGetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); if (FAILED(hr)) + { + Log_ErrorPrintf("CreateResource() failed: %08X", hr); return false; + } static const D3D12_RANGE read_range = {}; u8* host_pointer; hr = buffer->Map(0, &read_range, reinterpret_cast(&host_pointer)); - AssertMsg(SUCCEEDED(hr), "Map buffer"); if (FAILED(hr)) + { + Log_ErrorPrintf("Map() failed: %08X", hr); return false; + } Destroy(true); m_buffer = std::move(buffer); + m_allocation = std::move(allocation); m_host_pointer = host_pointer; m_size = size; m_gpu_pointer = m_buffer->GetGPUVirtualAddress(); return true; } -bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +bool D3D12StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) { const u32 required_bytes = num_bytes + alignment; @@ -120,15 +130,15 @@ bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) return false; } -void StreamBuffer::CommitMemory(u32 final_num_bytes) +void D3D12StreamBuffer::CommitMemory(u32 final_num_bytes) { - Assert((m_current_offset + final_num_bytes) <= m_size); - Assert(final_num_bytes <= m_current_space); + DebugAssert((m_current_offset + final_num_bytes) <= m_size); + DebugAssert(final_num_bytes <= m_current_space); m_current_offset += final_num_bytes; m_current_space -= final_num_bytes; } -void StreamBuffer::Destroy(bool defer) +void D3D12StreamBuffer::Destroy(bool defer) { if (m_host_pointer) { @@ -138,8 +148,9 @@ void StreamBuffer::Destroy(bool defer) } if (m_buffer && defer) - g_d3d12_context->DeferResourceDestruction(m_buffer.Get()); + D3D12Device::GetInstance().DeferResourceDestruction(std::move(m_allocation), std::move(m_buffer)); m_buffer.Reset(); + m_allocation.Reset(); m_current_offset = 0; m_current_space = 0; @@ -147,14 +158,14 @@ void StreamBuffer::Destroy(bool defer) m_tracked_fences.clear(); } -void StreamBuffer::UpdateCurrentFencePosition() +void D3D12StreamBuffer::UpdateCurrentFencePosition() { // Don't create a tracking entry if the GPU is caught up with the buffer. if (m_current_offset == m_current_gpu_position) return; // Has the offset changed since the last fence? - const u64 fence = g_d3d12_context->GetCurrentFenceValue(); + const u64 fence = D3D12Device::GetInstance().GetCurrentFenceValue(); if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) { // Still haven't executed a command buffer, so just update the offset. @@ -166,12 +177,12 @@ void StreamBuffer::UpdateCurrentFencePosition() m_tracked_fences.emplace_back(fence, m_current_offset); } -void StreamBuffer::UpdateGPUPosition() +void D3D12StreamBuffer::UpdateGPUPosition() { auto start = m_tracked_fences.begin(); auto end = start; - const u64 completed_counter = g_d3d12_context->GetCompletedFenceValue(); + const u64 completed_counter = D3D12Device::GetInstance().GetCompletedFenceValue(); while (end != m_tracked_fences.end() && completed_counter >= end->first) { m_current_gpu_position = end->second; @@ -182,7 +193,7 @@ void StreamBuffer::UpdateGPUPosition() m_tracked_fences.erase(start, end); } -bool StreamBuffer::WaitForClearSpace(u32 num_bytes) +bool D3D12StreamBuffer::WaitForClearSpace(u32 num_bytes) { u32 new_offset = 0; u32 new_space = 0; @@ -249,16 +260,14 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes) // Did any fences satisfy this condition? // Has the command buffer been executed yet? If not, the caller should execute it. - if (iter == m_tracked_fences.end() || iter->first == g_d3d12_context->GetCurrentFenceValue()) + if (iter == m_tracked_fences.end() || iter->first == D3D12Device::GetInstance().GetCurrentFenceValue()) return false; // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - g_d3d12_context->WaitForFence(iter->first); + D3D12Device::GetInstance().WaitForFence(iter->first); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset; m_current_space = new_space; m_current_gpu_position = new_gpu_position; return true; } - -} // namespace D3D12 diff --git a/src/common/d3d12/stream_buffer.h b/src/util/d3d12_stream_buffer.h similarity index 84% rename from src/common/d3d12/stream_buffer.h rename to src/util/d3d12_stream_buffer.h index 1655f6a30..314cb746c 100644 --- a/src/common/d3d12/stream_buffer.h +++ b/src/util/d3d12_stream_buffer.h @@ -1,22 +1,25 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -// Parts originally from Dolphin Emulator, also written by myself. #pragma once -#include "../types.h" -#include "../windows_headers.h" +#include "common/types.h" +#include "common/windows_headers.h" + #include #include #include #include -namespace D3D12 { -class StreamBuffer +namespace D3D12MA { +class Allocation; +} + +class D3D12StreamBuffer { public: - StreamBuffer(); - ~StreamBuffer(); + D3D12StreamBuffer(); + ~D3D12StreamBuffer(); bool Create(u32 size); @@ -48,11 +51,10 @@ private: u32 m_current_gpu_position = 0; Microsoft::WRL::ComPtr m_buffer; + Microsoft::WRL::ComPtr m_allocation; D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {}; u8* m_host_pointer = nullptr; // List of fences and the corresponding positions in the buffer std::deque> m_tracked_fences; }; - -} // namespace D3D12 diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp new file mode 100644 index 000000000..4417456a0 --- /dev/null +++ b/src/util/d3d12_texture.cpp @@ -0,0 +1,957 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d12_texture.h" +#include "d3d12_builders.h" +#include "d3d12_device.h" +#include "d3d_common.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/bitutils.h" +#include "common/log.h" +#include "common/string_util.h" + +#include "D3D12MemAlloc.h" + +Log_SetChannel(D3D12Device); + +D3D12Texture::D3D12Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + DXGI_FORMAT dxgi_format, ComPtr resource, + ComPtr allocation, const D3D12DescriptorHandle& srv_descriptor, + const D3D12DescriptorHandle& write_descriptor, const D3D12DescriptorHandle& uav_descriptor, + WriteDescriptorType wdtype, D3D12_RESOURCE_STATES resource_state) + : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), + static_cast(samples), type, format), + m_resource(std::move(resource)), m_allocation(std::move(allocation)), m_srv_descriptor(srv_descriptor), + m_write_descriptor(write_descriptor), m_uav_descriptor(uav_descriptor), m_dxgi_format(dxgi_format), + m_resource_state(resource_state), m_write_descriptor_type(wdtype) +{ +} + +D3D12Texture::~D3D12Texture() +{ + Destroy(true); +} + +std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data /* = nullptr */, u32 data_stride /* = 0 */, + bool dynamic /* = false */) +{ + if (!GPUTexture::ValidateConfig(width, height, layers, levels, samples, type, format)) + return {}; + + const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format); + + const DXGI_FORMAT uav_format = (type == GPUTexture::Type::RWTexture) ? fm.resource_format : DXGI_FORMAT_UNKNOWN; + + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.Width = width; + desc.Height = height; + desc.DepthOrArraySize = 1; + desc.MipLevels = static_cast(levels); + desc.Format = fm.resource_format; + desc.SampleDesc.Count = samples; + desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_WITHIN_BUDGET; + allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_CLEAR_VALUE optimized_clear_value = {}; + D3D12_RESOURCE_STATES state; + + switch (type) + { + case GPUTexture::Type::Texture: + { + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + state = D3D12_RESOURCE_STATE_COPY_DEST; + } + break; + + case GPUTexture::Type::RenderTarget: + { + // RT's tend to be larger, so we'll keep them committed for speed. + DebugAssert(levels == 1); + allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + optimized_clear_value.Format = fm.rtv_format; + state = D3D12_RESOURCE_STATE_RENDER_TARGET; + } + break; + + case GPUTexture::Type::DepthStencil: + { + DebugAssert(levels == 1); + allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + optimized_clear_value.Format = fm.dsv_format; + state = D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + break; + + case GPUTexture::Type::RWTexture: + { + DebugAssert(levels == 1); + allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; + state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + break; + + default: + return {}; + } + + if (uav_format != DXGI_FORMAT_UNKNOWN) + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + ComPtr resource; + ComPtr allocation; + HRESULT hr = m_allocator->CreateResource( + &allocationDesc, &desc, state, + (type == GPUTexture::Type::RenderTarget || type == GPUTexture::Type::DepthStencil) ? &optimized_clear_value : + nullptr, + allocation.GetAddressOf(), IID_PPV_ARGS(resource.GetAddressOf())); + if (FAILED(hr)) + { + // OOM isn't fatal. + if (hr != E_OUTOFMEMORY) + Log_ErrorPrintf("Create texture failed: 0x%08X", hr); + + return {}; + } + + D3D12DescriptorHandle srv_descriptor, write_descriptor, uav_descriptor; + D3D12Texture::WriteDescriptorType write_descriptor_type = D3D12Texture::WriteDescriptorType::None; + if (fm.srv_format != DXGI_FORMAT_UNKNOWN) + { + if (!CreateSRVDescriptor(resource.Get(), layers, levels, samples, fm.srv_format, &srv_descriptor)) + return {}; + } + + switch (type) + { + case GPUTexture::Type::RenderTarget: + { + write_descriptor_type = D3D12Texture::WriteDescriptorType::RTV; + if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor)) + { + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } + } + break; + + case GPUTexture::Type::DepthStencil: + { + write_descriptor_type = D3D12Texture::WriteDescriptorType::DSV; + if (!CreateDSVDescriptor(resource.Get(), samples, fm.dsv_format, &write_descriptor)) + { + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } + } + break; + + default: + break; + } + + if (uav_format != DXGI_FORMAT_UNKNOWN && !CreateUAVDescriptor(resource.Get(), samples, fm.dsv_format, &uav_descriptor)) + { + m_descriptor_heap_manager.Free(&write_descriptor); + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } + + std::unique_ptr tex(new D3D12Texture(width, height, layers, levels, samples, type, format, fm.resource_format, + std::move(resource), std::move(allocation), srv_descriptor, + write_descriptor, uav_descriptor, write_descriptor_type, state)); + + if (data) + { + tex->Update(0, 0, width, height, data, data_stride); + tex->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + return tex; +} + +bool D3D12Device::CreateSRVDescriptor(ID3D12Resource* resource, u32 layers, u32 levels, u32 samples, DXGI_FORMAT format, + D3D12DescriptorHandle* dh) +{ + if (!m_descriptor_heap_manager.Allocate(dh)) + { + Log_ErrorPrint("Failed to allocate SRV descriptor"); + return false; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC desc; + desc.Format = format; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + if (layers > 1) + { + if (samples > 1) + { + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + desc.Texture2DMSArray = {0u, layers}; + } + else + { + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + desc.Texture2DArray = {0u, levels, 0u, layers}; + } + } + else + { + if (samples > 1) + { + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } + else + { + desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + desc.Texture2D = {0u, levels}; + } + } + + m_device->CreateShaderResourceView(resource, &desc, dh->cpu_handle); + return true; +} + +bool D3D12Device::CreateRTVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, + D3D12DescriptorHandle* dh) +{ + if (!m_rtv_heap_manager.Allocate(dh)) + { + Log_ErrorPrint("Failed to allocate SRV descriptor"); + return false; + } + + const D3D12_RENDER_TARGET_VIEW_DESC desc = {format, (samples > 1) ? D3D12_RTV_DIMENSION_TEXTURE2DMS : + D3D12_RTV_DIMENSION_TEXTURE2D}; + m_device->CreateRenderTargetView(resource, &desc, dh->cpu_handle); + return true; +} + +bool D3D12Device::CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, + D3D12DescriptorHandle* dh) +{ + if (!m_dsv_heap_manager.Allocate(dh)) + { + Log_ErrorPrint("Failed to allocate SRV descriptor"); + return false; + } + + const D3D12_DEPTH_STENCIL_VIEW_DESC desc = { + format, (samples > 1) ? D3D12_DSV_DIMENSION_TEXTURE2DMS : D3D12_DSV_DIMENSION_TEXTURE2D, D3D12_DSV_FLAG_NONE}; + m_device->CreateDepthStencilView(resource, &desc, dh->cpu_handle); + return true; +} + +bool D3D12Device::CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, + D3D12DescriptorHandle* dh) +{ + if (!m_descriptor_heap_manager.Allocate(dh)) + { + Log_ErrorPrint("Failed to allocate UAV descriptor"); + return false; + } + + DebugAssert(samples == 1); + const D3D12_UNORDERED_ACCESS_VIEW_DESC desc = {format, D3D12_UAV_DIMENSION_TEXTURE2D}; + m_device->CreateUnorderedAccessView(resource, nullptr, &desc, dh->cpu_handle); + return true; +} + +void D3D12Texture::Destroy(bool defer) +{ + D3D12Device& dev = D3D12Device::GetInstance(); + dev.UnbindTexture(this); + + if (defer) + { + dev.DeferDescriptorDestruction(dev.GetDescriptorHeapManager(), &m_srv_descriptor); + + switch (m_write_descriptor_type) + { + case WriteDescriptorType::RTV: + dev.DeferDescriptorDestruction(dev.GetRTVHeapManager(), &m_write_descriptor); + break; + case WriteDescriptorType::DSV: + dev.DeferDescriptorDestruction(dev.GetDSVHeapManager(), &m_write_descriptor); + break; + case WriteDescriptorType::None: + default: + break; + } + + if (m_uav_descriptor) + dev.DeferDescriptorDestruction(dev.GetDescriptorHeapManager(), &m_uav_descriptor); + + dev.DeferResourceDestruction(std::move(m_allocation), std::move(m_resource)); + } + else + { + dev.GetDescriptorHeapManager().Free(&m_srv_descriptor); + + switch (m_write_descriptor_type) + { + case WriteDescriptorType::RTV: + dev.GetRTVHeapManager().Free(&m_write_descriptor); + break; + case WriteDescriptorType::DSV: + dev.GetDSVHeapManager().Free(&m_write_descriptor); + break; + case WriteDescriptorType::None: + default: + break; + } + + if (m_uav_descriptor) + dev.GetDescriptorHeapManager().Free(&m_uav_descriptor); + + m_resource.Reset(); + m_allocation.Reset(); + } + + m_write_descriptor_type = WriteDescriptorType::None; +} + +ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate() +{ + D3D12Device& dev = D3D12Device::GetInstance(); + if (m_type != Type::Texture || m_use_fence_counter == dev.GetCurrentFenceValue()) + { + // Console.WriteLn("Texture update within frame, can't use do beforehand"); + dev.EndRenderPass(); + return dev.GetCommandList(); + } + + return dev.GetInitCommandList(); +} + +void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, + u32 upload_pitch) const +{ + StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); +} + +ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, + u32 height) const +{ + const u32 size = upload_pitch * height; + ComPtr resource; + ComPtr allocation; + + const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD}; + const D3D12_RESOURCE_DESC resource_desc = { + D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( + &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(), + IID_PPV_ARGS(resource.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateResource() failed with %08X", hr); + return nullptr; + } + + void* map_ptr; + hr = resource->Map(0, nullptr, &map_ptr); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map() failed with %08X", hr); + return nullptr; + } + + CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch); + + const D3D12_RANGE write_range = {0, size}; + resource->Unmap(0, &write_range); + + // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. + // This adds the reference needed to keep the buffer alive. + ID3D12Resource* ret = resource.Get(); + D3D12Device::GetInstance().DeferResourceDestruction(std::move(allocation), std::move(resource)); + return ret; +} + +bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer, u32 level) +{ + DebugAssert(layer < m_layers && level < m_levels); + DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level)); + + D3D12Device& dev = D3D12Device::GetInstance(); + D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); + + const u32 upload_pitch = Common::AlignUpPow2(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 required_size = height * upload_pitch; + + D3D12_TEXTURE_COPY_LOCATION srcloc; + srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcloc.PlacedFootprint.Footprint.Width = width; + srcloc.PlacedFootprint.Footprint.Height = height; + srcloc.PlacedFootprint.Footprint.Depth = 1; + srcloc.PlacedFootprint.Footprint.Format = m_dxgi_format; + srcloc.PlacedFootprint.Footprint.RowPitch = upload_pitch; + + // If the texture is larger than half our streaming buffer size, use a separate buffer. + // Otherwise allocation will either fail, or require lots of cmdbuffer submissions. + if (required_size > (sbuffer.GetSize() / 2)) + { + srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + if (!srcloc.pResource) + return false; + + srcloc.PlacedFootprint.Offset = 0; + } + else + { + if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + D3D12Device::GetInstance().SubmitCommandList(false, "While waiting for %u bytes in texture upload buffer", + required_size); + if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + Log_ErrorPrintf("Failed to reserve texture upload memory (%u bytes).", required_size); + return false; + } + } + + srcloc.pResource = sbuffer.GetBuffer(); + srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset(); + CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + sbuffer.CommitMemory(required_size); + } + + ID3D12GraphicsCommandList4* cmdlist = GetCommandBufferForUpdate(); + + // if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear + if (m_type == Type::RenderTarget) + { + if (x != 0 || y != 0 || width != m_width || height != m_height) + CommitClear(cmdlist); + else + m_state = State::Dirty; + } + + // first time the texture is used? don't leave it undefined + if (m_resource_state == D3D12_RESOURCE_STATE_COMMON) + TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST); + else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) + TransitionSubresourceToState(cmdlist, layer, level, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST); + + D3D12_TEXTURE_COPY_LOCATION dstloc; + dstloc.pResource = m_resource.Get(); + dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstloc.SubresourceIndex = layer; + + const D3D12_BOX srcbox{0u, 0u, 0u, width, height, 1u}; + cmdlist->CopyTextureRegion(&dstloc, x, y, 0, &srcloc, &srcbox); + + if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) + TransitionSubresourceToState(cmdlist, layer, level, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state); + + return true; +} + +bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level) +{ + // TODO: linear textures for dynamic? + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) + { + return false; + } + + D3D12Device& dev = D3D12Device::GetInstance(); + if (m_state == State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) + CommitClear(GetCommandBufferForUpdate()); + + // see note in Update() for the reason why. + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer(); + if (req_size >= (buffer.GetSize() / 2)) + return false; + + if (!buffer.ReserveMemory(req_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + { + dev.SubmitCommandList(false, "While waiting for %u bytes in texture upload buffer", req_size); + if (!buffer.ReserveMemory(req_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)) + Panic("Failed to reserve texture upload memory"); + } + + // map for writing + *map = buffer.GetCurrentHostPointer(); + *map_stride = aligned_pitch; + m_map_x = static_cast(x); + m_map_y = static_cast(y); + m_map_width = static_cast(width); + m_map_height = static_cast(height); + m_map_layer = static_cast(layer); + m_map_level = static_cast(level); + m_state = State::Dirty; + return true; +} + +void D3D12Texture::Unmap() +{ + D3D12Device& dev = D3D12Device::GetInstance(); + D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer(); + const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = m_map_height * aligned_pitch; + const u32 offset = sb.GetCurrentOffset(); + sb.CommitMemory(req_size); + + ID3D12GraphicsCommandList4* cmdlist = GetCommandBufferForUpdate(); + + // first time the texture is used? don't leave it undefined + if (m_resource_state == D3D12_RESOURCE_STATE_COMMON) + TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST); + else if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) + TransitionSubresourceToState(cmdlist, m_map_layer, m_map_level, m_resource_state, D3D12_RESOURCE_STATE_COPY_DEST); + + D3D12_TEXTURE_COPY_LOCATION srcloc; + srcloc.pResource = sb.GetBuffer(); + srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcloc.PlacedFootprint.Offset = offset; + srcloc.PlacedFootprint.Footprint.Width = m_map_width; + srcloc.PlacedFootprint.Footprint.Height = m_map_height; + srcloc.PlacedFootprint.Footprint.Depth = 1; + srcloc.PlacedFootprint.Footprint.Format = m_dxgi_format; + srcloc.PlacedFootprint.Footprint.RowPitch = aligned_pitch; + + D3D12_TEXTURE_COPY_LOCATION dstloc; + dstloc.pResource = m_resource.Get(); + dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstloc.SubresourceIndex = m_map_level; + + const D3D12_BOX srcbox{0u, 0u, 0u, m_map_width, m_map_height, 1}; + cmdlist->CopyTextureRegion(&dstloc, m_map_x, m_map_y, 0, &srcloc, &srcbox); + + if (m_resource_state != D3D12_RESOURCE_STATE_COPY_DEST) + TransitionSubresourceToState(cmdlist, m_map_layer, m_map_level, D3D12_RESOURCE_STATE_COPY_DEST, m_resource_state); + + m_map_x = 0; + m_map_y = 0; + m_map_width = 0; + m_map_height = 0; + m_map_layer = 0; + m_map_level = 0; +} + +void D3D12Texture::CommitClear() +{ + if (m_state != GPUTexture::State::Cleared) + return; + + D3D12Device& dev = D3D12Device::GetInstance(); + dev.EndRenderPass(); + + ActuallyCommitClear(dev.GetCommandList()); +} + +void D3D12Texture::CommitClear(ID3D12GraphicsCommandList* cmdlist) +{ + if (m_state != GPUTexture::State::Cleared) + return; + + ActuallyCommitClear(cmdlist); +} + +void D3D12Texture::ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist) +{ + if (IsDepthStencil()) + { + TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); + cmdlist->ClearDepthStencilView(GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_clear_value.depth, 0, 0, nullptr); + } + else + { + TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); + cmdlist->ClearRenderTargetView(GetWriteDescriptor(), D3D12Device::RGBA8ToFloat(m_clear_value.color).data(), 0, + nullptr); + } + + SetState(State::Dirty); +} + +void D3D12Texture::SetDebugName(const std::string_view& name) +{ + D3D12::SetObjectName(m_resource.Get(), name); +} + +u32 D3D12Texture::CalculateSubresource(u32 layer, u32 level, u32 num_levels) +{ + // D3D11CalcSubresource + return level + layer * num_levels; +} + +u32 D3D12Texture::CalculateSubresource(u32 layer, u32 level) const +{ + return CalculateSubresource(layer, level, m_levels); +} + +void D3D12Texture::TransitionToState(D3D12_RESOURCE_STATES state) +{ + TransitionToState(D3D12Device::GetInstance().GetCommandList(), state); +} + +void D3D12Texture::TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state) +{ + if (m_resource_state == state) + return; + + const D3D12_RESOURCE_STATES prev_state = m_resource_state; + m_resource_state = state; + + const D3D12_RESOURCE_BARRIER barrier = { + D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + D3D12_RESOURCE_BARRIER_FLAG_NONE, + {{m_resource.Get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, prev_state, state}}}; + cmdlist->ResourceBarrier(1, &barrier); +} + +void D3D12Texture::TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, u32 layer, u32 level, + D3D12_RESOURCE_STATES before_state, + D3D12_RESOURCE_STATES after_state) const +{ + TransitionSubresourceToState(cmdlist, m_resource.Get(), CalculateSubresource(layer, level), before_state, + after_state); +} + +void D3D12Texture::TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, u32 subresource, + D3D12_RESOURCE_STATES before_state, + D3D12_RESOURCE_STATES after_state) const +{ + TransitionSubresourceToState(cmdlist, m_resource.Get(), subresource, before_state, after_state); +} + +void D3D12Texture::TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, + u32 subresource, D3D12_RESOURCE_STATES before_state, + D3D12_RESOURCE_STATES after_state) +{ + const D3D12_RESOURCE_BARRIER barrier = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + D3D12_RESOURCE_BARRIER_FLAG_NONE, + {{resource, subresource, before_state, after_state}}}; + cmdlist->ResourceBarrier(1, &barrier); +} + +void D3D12Texture::MakeReadyForSampling() +{ + if (m_resource_state == D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + return; + + D3D12Device& dev = D3D12Device::GetInstance(); + if (dev.InRenderPass()) + dev.EndRenderPass(); + + TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); +} + +bool D3D12Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ + D3D12Texture* T = static_cast(texture); + T->CommitClear(); + + const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 size = pitch * height; + const u32 subresource = 0; + if (!CheckDownloadBufferSize(size)) + { + Log_ErrorPrintf("Can't read back %ux%u", width, height); + return false; + } + + if (InRenderPass()) + EndRenderPass(); + + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + D3D12_TEXTURE_COPY_LOCATION srcloc; + srcloc.pResource = T->GetResource(); + srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcloc.SubresourceIndex = subresource; + + D3D12_TEXTURE_COPY_LOCATION dstloc; + dstloc.pResource = m_download_buffer.Get(); + dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstloc.PlacedFootprint.Offset = 0; + dstloc.PlacedFootprint.Footprint.Format = T->GetDXGIFormat(); + dstloc.PlacedFootprint.Footprint.Width = width; + dstloc.PlacedFootprint.Footprint.Height = height; + dstloc.PlacedFootprint.Footprint.Depth = 1; + dstloc.PlacedFootprint.Footprint.RowPitch = pitch; + + const D3D12_RESOURCE_STATES old_layout = T->GetResourceState(); + if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) + T->TransitionSubresourceToState(cmdlist, subresource, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE); + + // TODO: Rules for depth buffers here? + const D3D12_BOX srcbox{static_cast(x), static_cast(y), 0u, + static_cast(x + width), static_cast(y + height), 1u}; + cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox); + + if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) + T->TransitionSubresourceToState(cmdlist, subresource, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout); + + SubmitCommandList(true); + + u8* map_pointer; + const D3D12_RANGE read_range{0u, size}; + const HRESULT hr = m_download_buffer->Map(0, &read_range, reinterpret_cast(const_cast(&map_pointer))); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); + return false; + } + + StringUtil::StrideMemCpy(out_data, out_data_stride, map_pointer, pitch, width * T->GetPixelSize(), height); + m_download_buffer->Unmap(0, nullptr); + return true; +} + +bool D3D12Device::CheckDownloadBufferSize(u32 required_size) +{ + if (m_download_buffer_size >= required_size) + return true; + + DestroyDownloadBuffer(); + + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; + + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + required_size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + HRESULT hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + m_download_buffer_allocation.ReleaseAndGetAddressOf(), + IID_PPV_ARGS(m_download_buffer.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateResource() failed with HRESULT %08X", hr); + return false; + } + + return true; +} + +void D3D12Device::DestroyDownloadBuffer() +{ + if (!m_download_buffer) + return; + + m_download_buffer.Reset(); + m_download_buffer_allocation.Reset(); + m_download_buffer_size = 0; +} + +D3D12Sampler::D3D12Sampler(D3D12DescriptorHandle descriptor) : m_descriptor(descriptor) +{ +} + +D3D12Sampler::~D3D12Sampler() +{ + // Cleaned up by main class. +} + +void D3D12Sampler::SetDebugName(const std::string_view& name) +{ +} + +D3D12DescriptorHandle D3D12Device::GetSampler(const GPUSampler::Config& config) +{ + const auto it = m_sampler_map.find(config.key); + if (it != m_sampler_map.end()) + return it->second; + + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + D3D12_TEXTURE_ADDRESS_MODE_WRAP, // Repeat + D3D12_TEXTURE_ADDRESS_MODE_CLAMP, // ClampToEdge + D3D12_TEXTURE_ADDRESS_MODE_BORDER, // ClampToBorder + }}; + + static constexpr u8 filter_count = static_cast(GPUSampler::Filter::MaxCount); + static constexpr D3D12_FILTER filters[filter_count][filter_count][filter_count] = { + { + {D3D12_FILTER_MIN_MAG_MIP_POINT, D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT}, + {D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT, D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT}, + }, + { + {D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR, D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR}, + {D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR, D3D12_FILTER_MIN_MAG_MIP_LINEAR}, + }}; + + D3D12_SAMPLER_DESC desc = {}; + desc.AddressU = ta[static_cast(config.address_u.GetValue())]; + desc.AddressV = ta[static_cast(config.address_v.GetValue())]; + desc.AddressW = ta[static_cast(config.address_w.GetValue())]; + std::memcpy(desc.BorderColor, RGBA8ToFloat(config.border_color).data(), sizeof(desc.BorderColor)); + desc.MinLOD = static_cast(config.min_lod); + desc.MaxLOD = static_cast(config.max_lod); + + if (config.anisotropy > 0) + { + desc.Filter = D3D12_FILTER_ANISOTROPIC; + desc.MaxAnisotropy = config.anisotropy; + } + else + { + desc.Filter = filters[static_cast(config.mip_filter.GetValue())][static_cast(config.min_filter.GetValue())] + [static_cast(config.mag_filter.GetValue())]; + desc.MaxAnisotropy = 1; + } + + D3D12DescriptorHandle handle; + if (m_sampler_heap_manager.Allocate(&handle)) + m_device->CreateSampler(&desc, handle); + + m_sampler_map.emplace(config.key, handle); + return handle; +} + +void D3D12Device::DestroySamplers() +{ + for (auto& it : m_sampler_map) + { + if (it.second) + m_sampler_heap_manager.Free(&it.second); + } + m_sampler_map.clear(); +} + +std::unique_ptr D3D12Device::CreateSampler(const GPUSampler::Config& config) +{ + const D3D12DescriptorHandle handle = GetSampler(config); + if (!handle) + return {}; + + return std::unique_ptr(new D3D12Sampler(std::move(handle))); +} + +D3D12Framebuffer::D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv, + D3D12DescriptorHandle dsv) + : GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) +{ +} + +D3D12Framebuffer::~D3D12Framebuffer() +{ + D3D12Device& dev = D3D12Device::GetInstance(); + if (m_rtv) + D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetRTVHeapManager(), &m_rtv); + if (m_dsv) + D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetDSVHeapManager(), &m_dsv); +} + +void D3D12Framebuffer::SetDebugName(const std::string_view& name) +{ +} + +std::unique_ptr D3D12Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/) +{ + DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); + D3D12Texture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); + D3D12Texture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); + + const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); + const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); + + D3D12DescriptorHandle rtv, dsv; + if (RT) + rtv = RT->GetWriteDescriptor(); + if (DS) + dsv = DS->GetWriteDescriptor(); + + return std::unique_ptr(new D3D12Framebuffer(RT, DS, width, height, std::move(rtv), std::move(dsv))); +} + +D3D12TextureBuffer::D3D12TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) +{ +} + +D3D12TextureBuffer::~D3D12TextureBuffer() +{ + Destroy(true); +} + +bool D3D12TextureBuffer::Create(D3D12Device& dev) +{ + static constexpr std::array(GPUTextureBuffer::Format::MaxCount)> format_mapping = {{ + DXGI_FORMAT_R16_UINT, // R16UI + }}; + + if (!m_buffer.Create(GetSizeInBytes())) + return false; + + if (!dev.GetDescriptorHeapManager().Allocate(&m_descriptor)) + return {}; + + D3D12_SHADER_RESOURCE_VIEW_DESC desc = {format_mapping[static_cast(m_format)], D3D12_SRV_DIMENSION_BUFFER, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING}; + desc.Buffer.NumElements = m_size_in_elements; + dev.GetDevice()->CreateShaderResourceView(m_buffer.GetBuffer(), &desc, m_descriptor); + return true; +} + +void D3D12TextureBuffer::Destroy(bool defer) +{ + D3D12Device& dev = D3D12Device::GetInstance(); + if (m_descriptor) + { + if (defer) + dev.DeferDescriptorDestruction(dev.GetDescriptorHeapManager(), &m_descriptor); + else + dev.GetDescriptorHeapManager().Free(&m_descriptor); + } +} + +void* D3D12TextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const u32 req_size = esize * required_elements; + if (!m_buffer.ReserveMemory(req_size, esize)) + { + D3D12Device::GetInstance().SubmitCommandListAndRestartRenderPass("out of space in texture buffer"); + if (!m_buffer.ReserveMemory(req_size, esize)) + Panic("Failed to allocate texture buffer space."); + } + + m_current_position = m_buffer.GetCurrentOffset() / esize; + return m_buffer.GetCurrentHostPointer(); +} + +void D3D12TextureBuffer::Unmap(u32 used_elements) +{ + m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); +} + +void D3D12TextureBuffer::SetDebugName(const std::string_view& name) +{ + D3D12::SetObjectName(m_buffer.GetBuffer(), name); +} + +std::unique_ptr D3D12Device::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ + + std::unique_ptr tb = std::make_unique(format, size_in_elements); + if (!tb->Create(*this)) + tb.reset(); + + return tb; +} diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h new file mode 100644 index 000000000..a5183117f --- /dev/null +++ b/src/util/d3d12_texture.h @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "d3d12_descriptor_heap_manager.h" +#include "d3d12_stream_buffer.h" +#include "gpu_device.h" +#include "gpu_texture.h" + +#include +#include +#include + +namespace D3D12MA { +class Allocation; +} + +class D3D12Device; + +class D3D12Texture final : public GPUTexture +{ + friend D3D12Device; + +public: + template + using ComPtr = Microsoft::WRL::ComPtr; + + ~D3D12Texture() override; + + void Destroy(bool defer); + + ALWAYS_INLINE const D3D12DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } + ALWAYS_INLINE const D3D12DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; } + ALWAYS_INLINE const D3D12DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; } + ALWAYS_INLINE D3D12_RESOURCE_STATES GetResourceState() const { return m_resource_state; } + ALWAYS_INLINE DXGI_FORMAT GetDXGIFormat() const { return m_dxgi_format; } + ALWAYS_INLINE ID3D12Resource* GetResource() const { return m_resource.Get(); } + + bool IsValid() const override { return static_cast(m_resource); } + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + void MakeReadyForSampling() override; + + void SetDebugName(const std::string_view& name) override; + + void TransitionToState(D3D12_RESOURCE_STATES state); + void CommitClear(); + void CommitClear(ID3D12GraphicsCommandList* cmdlist); + + static u32 CalculateSubresource(u32 layer, u32 level, u32 num_levels); + u32 CalculateSubresource(u32 layer, u32 level) const; + + void TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state); + void TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, u32 layer, u32 level, + D3D12_RESOURCE_STATES before_state, D3D12_RESOURCE_STATES after_state) const; + void TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, u32 subresource, + D3D12_RESOURCE_STATES before_state, D3D12_RESOURCE_STATES after_state) const; + static void TransitionSubresourceToState(ID3D12GraphicsCommandList* cmdlist, ID3D12Resource* resource, + u32 subresource, D3D12_RESOURCE_STATES before_state, + D3D12_RESOURCE_STATES after_state); + + // Call when the texture is bound to the pipeline, or read from in a copy. + ALWAYS_INLINE void SetUseFenceValue(u64 counter) { m_use_fence_counter = counter; } + +private: + enum class WriteDescriptorType : u8 + { + None, + RTV, + DSV + }; + + D3D12Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + DXGI_FORMAT dxgi_format, ComPtr resource, ComPtr allocation, + const D3D12DescriptorHandle& srv_descriptor, const D3D12DescriptorHandle& write_descriptor, + const D3D12DescriptorHandle& uav_descriptor, WriteDescriptorType wdtype, + D3D12_RESOURCE_STATES resource_state); + + ID3D12GraphicsCommandList4* GetCommandBufferForUpdate(); + ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, + u32 height) const; + void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; + void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist); + + ComPtr m_resource; + ComPtr m_allocation; + + D3D12DescriptorHandle m_srv_descriptor = {}; + D3D12DescriptorHandle m_write_descriptor = {}; + D3D12DescriptorHandle m_uav_descriptor = {}; + + DXGI_FORMAT m_dxgi_format = DXGI_FORMAT_UNKNOWN; + D3D12_RESOURCE_STATES m_resource_state = D3D12_RESOURCE_STATE_COMMON; + WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None; + + // Contains the fence counter when the texture was last used. + // When this matches the current fence counter, the texture was used this command buffer. + u64 m_use_fence_counter = 0; + + u16 m_map_x = 0; + u16 m_map_y = 0; + u16 m_map_width = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; +}; + +class D3D12Sampler final : public GPUSampler +{ + friend D3D12Device; + +public: + ~D3D12Sampler() override; + + ALWAYS_INLINE const D3D12DescriptorHandle& GetDescriptor() const { return m_descriptor; } + + void SetDebugName(const std::string_view& name) override; + +private: + D3D12Sampler(D3D12DescriptorHandle descriptor); + + D3D12DescriptorHandle m_descriptor; +}; + +class D3D12Framebuffer final : public GPUFramebuffer +{ + friend D3D12Device; + +public: + ~D3D12Framebuffer() override; + + ALWAYS_INLINE const D3D12DescriptorHandle& GetRTV() const { return m_rtv; } + ALWAYS_INLINE const D3D12DescriptorHandle& GetDSV() const { return m_dsv; } + + void SetDebugName(const std::string_view& name) override; + +private: + D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv, + D3D12DescriptorHandle dsv); + + D3D12DescriptorHandle m_rtv; + D3D12DescriptorHandle m_dsv; +}; + +class D3D12TextureBuffer final : public GPUTextureBuffer +{ + friend D3D12Device; + +public: + D3D12TextureBuffer(Format format, u32 size_in_elements); + ~D3D12TextureBuffer() override; + + ALWAYS_INLINE const D3D12DescriptorHandle& GetDescriptor() const { return m_descriptor; } + + bool Create(D3D12Device& dev); + void Destroy(bool defer); + + // Inherited via GPUTextureBuffer + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; + + void SetDebugName(const std::string_view& name) override; + +private: + D3D12StreamBuffer m_buffer; + D3D12DescriptorHandle m_descriptor; +}; diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp new file mode 100644 index 000000000..e35a91057 --- /dev/null +++ b/src/util/d3d_common.cpp @@ -0,0 +1,469 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "d3d_common.h" + +#include "common/assert.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/rectangle.h" +#include "common/string_util.h" + +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(D3DCommon); + +static unsigned s_next_bad_shader_id = 1; + +const char* D3DCommon::GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level) +{ + static constexpr std::array, 4> feature_level_names = {{ + {D3D_FEATURE_LEVEL_10_0, "D3D_FEATURE_LEVEL_10_0"}, + {D3D_FEATURE_LEVEL_10_1, "D3D_FEATURE_LEVEL_10_1"}, + {D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"}, + {D3D_FEATURE_LEVEL_11_1, "D3D_FEATURE_LEVEL_11_1"}, + }}; + + for (const auto& [fl, name] : feature_level_names) + { + if (fl == feature_level) + return name; + } + + return "D3D_FEATURE_LEVEL_UNKNOWN"; +} + +const char* D3DCommon::GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL feature_level) +{ + static constexpr std::array, 4> feature_level_names = {{ + {D3D_FEATURE_LEVEL_10_0, "sm40"}, + {D3D_FEATURE_LEVEL_10_1, "sm41"}, + {D3D_FEATURE_LEVEL_11_0, "sm50"}, + {D3D_FEATURE_LEVEL_11_1, "sm51"}, + }}; + + for (const auto& [fl, name] : feature_level_names) + { + if (fl == feature_level) + return name; + } + + return "unk"; +} + +Microsoft::WRL::ComPtr D3DCommon::CreateFactory(bool debug) +{ + UINT flags = 0; + if (debug) + flags |= DXGI_CREATE_FACTORY_DEBUG; + + Microsoft::WRL::ComPtr factory; + const HRESULT hr = CreateDXGIFactory2(flags, IID_PPV_ARGS(factory.GetAddressOf())); + if (FAILED(hr)) + Log_ErrorPrintf("Failed to create DXGI factory: %08X", hr); + + return factory; +} + +static std::string FixupDuplicateAdapterNames(const std::vector& adapter_names, std::string adapter_name) +{ + if (std::any_of(adapter_names.begin(), adapter_names.end(), + [&adapter_name](const std::string& other) { return (adapter_name == other); })) + { + std::string original_adapter_name = std::move(adapter_name); + + u32 current_extra = 2; + do + { + adapter_name = fmt::format("{} ({})", original_adapter_name.c_str(), current_extra); + current_extra++; + } while (std::any_of(adapter_names.begin(), adapter_names.end(), + [&adapter_name](const std::string& other) { return (adapter_name == other); })); + } + + return adapter_name; +} + +std::vector D3DCommon::GetAdapterNames(IDXGIFactory5* factory) +{ + std::vector adapter_names; + + Microsoft::WRL::ComPtr adapter; + for (u32 index = 0;; index++) + { + const HRESULT hr = factory->EnumAdapters1(index, adapter.ReleaseAndGetAddressOf()); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + + if (FAILED(hr)) + { + Log_ErrorPrintf("IDXGIFactory2::EnumAdapters() returned %08X", hr); + continue; + } + + adapter_names.push_back(FixupDuplicateAdapterNames(adapter_names, GetAdapterName(adapter.Get()))); + } + + return adapter_names; +} + +std::vector D3DCommon::GetFullscreenModes(IDXGIFactory5* factory, const std::string_view& adapter_name) +{ + std::vector modes; + HRESULT hr; + + Microsoft::WRL::ComPtr adapter = GetChosenOrFirstAdapter(factory, adapter_name); + if (!adapter) + return modes; + + Microsoft::WRL::ComPtr output; + if (FAILED(hr = adapter->EnumOutputs(0, &output))) + { + Log_ErrorPrintf("EnumOutputs() failed: %08X", hr); + return modes; + } + + UINT num_modes = 0; + if (FAILED(hr = output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, nullptr))) + { + Log_ErrorPrintf("GetDisplayModeList() failed: %08X", hr); + return modes; + } + + std::vector dmodes(num_modes); + if (FAILED(hr = output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, dmodes.data()))) + { + Log_ErrorPrintf("GetDisplayModeList() (2) failed: %08X", hr); + return modes; + } + + for (const DXGI_MODE_DESC& mode : dmodes) + { + modes.push_back(GPUDevice::GetFullscreenModeString(mode.Width, mode.Height, + static_cast(mode.RefreshRate.Numerator) / + static_cast(mode.RefreshRate.Denominator))); + } + + return modes; +} + +bool D3DCommon::GetRequestedExclusiveFullscreenModeDesc(IDXGIFactory5* factory, const RECT& window_rect, u32 width, + u32 height, float refresh_rate, DXGI_FORMAT format, + DXGI_MODE_DESC* fullscreen_mode, IDXGIOutput** output) +{ + // We need to find which monitor the window is located on. + const Common::Rectangle client_rc_vec(window_rect.left, window_rect.top, window_rect.right, window_rect.bottom); + + // The window might be on a different adapter to which we are rendering.. so we have to enumerate them all. + HRESULT hr; + Microsoft::WRL::ComPtr first_output, intersecting_output; + + for (u32 adapter_index = 0; !intersecting_output; adapter_index++) + { + Microsoft::WRL::ComPtr adapter; + hr = factory->EnumAdapters1(adapter_index, adapter.GetAddressOf()); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + else if (FAILED(hr)) + continue; + + for (u32 output_index = 0;; output_index++) + { + Microsoft::WRL::ComPtr this_output; + DXGI_OUTPUT_DESC output_desc; + hr = adapter->EnumOutputs(output_index, this_output.GetAddressOf()); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + else if (FAILED(hr) || FAILED(this_output->GetDesc(&output_desc))) + continue; + + const Common::Rectangle output_rc(output_desc.DesktopCoordinates.left, output_desc.DesktopCoordinates.top, + output_desc.DesktopCoordinates.right, + output_desc.DesktopCoordinates.bottom); + if (!client_rc_vec.Intersects(output_rc)) + { + intersecting_output = std::move(this_output); + break; + } + + // Fallback to the first monitor. + if (!first_output) + first_output = std::move(this_output); + } + } + + if (!intersecting_output) + { + if (!first_output) + { + Log_ErrorPrintf("No DXGI output found. Can't use exclusive fullscreen."); + return false; + } + + Log_WarningPrint("No DXGI output found for window, using first."); + intersecting_output = std::move(first_output); + } + + DXGI_MODE_DESC request_mode = {}; + request_mode.Width = width; + request_mode.Height = height; + request_mode.Format = format; + request_mode.RefreshRate.Numerator = static_cast(std::floor(refresh_rate * 1000.0f)); + request_mode.RefreshRate.Denominator = 1000u; + + if (FAILED(hr = intersecting_output->FindClosestMatchingMode(&request_mode, fullscreen_mode, nullptr)) || + request_mode.Format != format) + { + Log_ErrorPrintf("Failed to find closest matching mode, hr=%08X", hr); + return false; + } + + *output = intersecting_output.Get(); + intersecting_output->AddRef(); + return true; +} + +Microsoft::WRL::ComPtr D3DCommon::GetAdapterByName(IDXGIFactory5* factory, const std::string_view& name) +{ + if (name.empty()) + return {}; + + // This might seem a bit odd to cache the names.. but there's a method to the madness. + // We might have two GPUs with the same name... :) + std::vector adapter_names; + + Microsoft::WRL::ComPtr adapter; + for (u32 index = 0;; index++) + { + const HRESULT hr = factory->EnumAdapters1(index, adapter.ReleaseAndGetAddressOf()); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + + if (FAILED(hr)) + { + Log_ErrorPrintf("IDXGIFactory2::EnumAdapters() returned %08X"); + continue; + } + + std::string adapter_name = FixupDuplicateAdapterNames(adapter_names, GetAdapterName(adapter.Get())); + if (adapter_name == name) + { + Log_VerbosePrintf("Found adapter '%s'", adapter_name.c_str()); + return adapter; + } + + adapter_names.push_back(std::move(adapter_name)); + } + + Log_ErrorPrintf(fmt::format("Adapter '{}' not found.", name).c_str()); + return {}; +} + +Microsoft::WRL::ComPtr D3DCommon::GetFirstAdapter(IDXGIFactory5* factory) +{ + Microsoft::WRL::ComPtr adapter; + HRESULT hr = factory->EnumAdapters1(0, adapter.GetAddressOf()); + if (FAILED(hr)) + Log_ErrorPrintf("IDXGIFactory2::EnumAdapters() for first adapter returned %08X", hr); + + return adapter; +} + +Microsoft::WRL::ComPtr D3DCommon::GetChosenOrFirstAdapter(IDXGIFactory5* factory, + const std::string_view& name) +{ + Microsoft::WRL::ComPtr adapter = GetAdapterByName(factory, name); + if (!adapter) + adapter = GetFirstAdapter(factory); + + return adapter; +} + +std::string D3DCommon::GetAdapterName(IDXGIAdapter1* adapter) +{ + std::string ret; + + DXGI_ADAPTER_DESC1 desc; + HRESULT hr = adapter->GetDesc1(&desc); + if (SUCCEEDED(hr)) + { + ret = StringUtil::WideStringToUTF8String(desc.Description); + } + else + { + Log_ErrorPrintf("IDXGIAdapter1::GetDesc() returned %08X", hr); + } + + if (ret.empty()) + ret = "(Unknown)"; + + return ret; +} + +std::string D3DCommon::GetDriverVersionFromLUID(const LUID& luid) +{ + std::string ret; + + HKEY hKey; + if (RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\DirectX", 0, KEY_READ, &hKey) == ERROR_SUCCESS) + { + DWORD max_key_len = 0, adapter_count = 0; + if (RegQueryInfoKeyW(hKey, nullptr, nullptr, nullptr, &adapter_count, &max_key_len, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr) == ERROR_SUCCESS) + { + std::vector current_name(max_key_len + 1); + for (DWORD i = 0; i < adapter_count; ++i) + { + DWORD subKeyLength = static_cast(current_name.size()); + if (RegEnumKeyExW(hKey, i, current_name.data(), &subKeyLength, nullptr, nullptr, nullptr, nullptr) == + ERROR_SUCCESS) + { + LUID current_luid = {}; + DWORD current_luid_size = sizeof(uint64_t); + if (RegGetValueW(hKey, current_name.data(), L"AdapterLuid", RRF_RT_QWORD, nullptr, ¤t_luid, + ¤t_luid_size) == ERROR_SUCCESS && + current_luid.HighPart == luid.HighPart && current_luid.LowPart == luid.LowPart) + { + LARGE_INTEGER driver_version = {}; + DWORD driver_version_size = sizeof(driver_version); + if (RegGetValueW(hKey, current_name.data(), L"DriverVersion", RRF_RT_QWORD, nullptr, &driver_version, + &driver_version_size) == ERROR_SUCCESS) + { + WORD nProduct = HIWORD(driver_version.HighPart); + WORD nVersion = LOWORD(driver_version.HighPart); + WORD nSubVersion = HIWORD(driver_version.LowPart); + WORD nBuild = LOWORD(driver_version.LowPart); + ret = fmt::format("{}.{}.{}.{}", nProduct, nVersion, nSubVersion, nBuild); + } + } + } + } + } + + RegCloseKey(hKey); + } + + return ret; +} + +std::optional> D3DCommon::CompileShader(D3D_FEATURE_LEVEL feature_level, bool debug_device, + GPUShaderStage stage, const std::string_view& source, + const char* entry_point) +{ + const char* target; + switch (feature_level) + { + case D3D_FEATURE_LEVEL_10_0: + { + static constexpr std::array targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}}; + target = targets[static_cast(stage)]; + } + break; + + case D3D_FEATURE_LEVEL_10_1: + { + static constexpr std::array targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}}; + target = targets[static_cast(stage)]; + } + break; + + case D3D_FEATURE_LEVEL_11_0: + { + static constexpr std::array targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}}; + target = targets[static_cast(stage)]; + } + break; + + case D3D_FEATURE_LEVEL_11_1: + default: + { + static constexpr std::array targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}}; + target = targets[static_cast(stage)]; + } + break; + } + + static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3; + static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG; + + Microsoft::WRL::ComPtr blob; + Microsoft::WRL::ComPtr error_blob; + const HRESULT hr = + D3DCompile(source.data(), source.size(), "0", nullptr, nullptr, entry_point, target, + debug_device ? flags_debug : flags_non_debug, 0, blob.GetAddressOf(), error_blob.GetAddressOf()); + + std::string error_string; + if (error_blob) + { + error_string.append(static_cast(error_blob->GetBufferPointer()), error_blob->GetBufferSize()); + error_blob.Reset(); + } + + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to compile '%s':\n%s", target, error_string.c_str()); + + auto fp = FileSystem::OpenManagedCFile( + GPUDevice::GetShaderDumpPath(fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile as %s failed: %08X\n", target, hr); + std::fwrite(error_string.c_str(), error_string.size(), 1, fp.get()); + } + + return {}; + } + + if (!error_string.empty()) + Log_WarningPrintf("'%s' compiled with warnings:\n%s", target, error_string.c_str()); + + return DynamicHeapArray(static_cast(blob->GetBufferPointer()), blob->GetBufferSize()); +} + +static constexpr std::array(GPUTexture::Format::MaxCount)> + s_format_mapping = {{ + // clang-format off + // d3d_format srv_format rtv_format dsv_format + {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // Unknown + {DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA8 + {DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_UNKNOWN }, // BGRA8 + {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB565 + {DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA5551 + {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_UNKNOWN }, // R8 + {DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_D16_UNORM }, // D16 + {DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_UNKNOWN }, // R16 + {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_UNKNOWN }, // R16F + {DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_UNKNOWN }, // R32I + {DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_UNKNOWN }, // R32U + {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN }, // R32F + {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_UNKNOWN }, // RG8 + {DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_UNKNOWN }, // RG16 + {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RG16F + {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RG32F + {DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA16 + {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F + {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F + {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2 + // clang-format on + }}; + +const D3DCommon::DXGIFormatMapping& D3DCommon::GetFormatMapping(GPUTexture::Format format) +{ + DebugAssert(static_cast(format) < s_format_mapping.size()); + return s_format_mapping[static_cast(format)]; +} + +GPUTexture::Format D3DCommon::GetFormatForDXGIFormat(DXGI_FORMAT format) +{ + for (u32 i = 0; i < static_cast(GPUTexture::Format::MaxCount); i++) + { + if (s_format_mapping[i].resource_format == format) + return static_cast(i); + } + + return GPUTexture::Format::Unknown; +} diff --git a/src/util/d3d_common.h b/src/util/d3d_common.h new file mode 100644 index 000000000..1390a7d76 --- /dev/null +++ b/src/util/d3d_common.h @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" + +#include "common/heap_array.h" +#include "common/types.h" +#include "common/windows_headers.h" + +#include +#include +#include +#include +#include +#include + +struct IDXGIFactory5; +struct IDXGIAdapter1; +struct IDXGIOutput; +struct DXGI_MODE_DESC; + +namespace D3DCommon { +// returns string representation of feature level +const char* GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level); +const char* GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL feature_level); + +// create a dxgi factory +Microsoft::WRL::ComPtr CreateFactory(bool debug); + +// returns a list of all adapter names +std::vector GetAdapterNames(IDXGIFactory5* factory); + +// returns a list of fullscreen modes for the specified adapter +std::vector GetFullscreenModes(IDXGIFactory5* factory, const std::string_view& adapter_name); + +// returns the fullscreen mode to use for the specified dimensions +bool GetRequestedExclusiveFullscreenModeDesc(IDXGIFactory5* factory, const RECT& window_rect, u32 width, u32 height, + float refresh_rate, DXGI_FORMAT format, DXGI_MODE_DESC* fullscreen_mode, + IDXGIOutput** output); + +// get an adapter based on name +Microsoft::WRL::ComPtr GetAdapterByName(IDXGIFactory5* factory, const std::string_view& name); + +// returns the first adapter in the system +Microsoft::WRL::ComPtr GetFirstAdapter(IDXGIFactory5* factory); + +// returns the adapter specified in the configuration, or the default +Microsoft::WRL::ComPtr GetChosenOrFirstAdapter(IDXGIFactory5* factory, const std::string_view& name); + +// returns a utf-8 string of the specified adapter's name +std::string GetAdapterName(IDXGIAdapter1* adapter); + +// returns the driver version from the registry as a string +std::string GetDriverVersionFromLUID(const LUID& luid); + +std::optional> CompileShader(D3D_FEATURE_LEVEL feature_level, bool debug_device, + GPUShaderStage stage, const std::string_view& source, + const char* entry_point); + +struct DXGIFormatMapping +{ + DXGI_FORMAT resource_format; + DXGI_FORMAT srv_format; + DXGI_FORMAT rtv_format; + DXGI_FORMAT dsv_format; +}; +const DXGIFormatMapping& GetFormatMapping(GPUTexture::Format format); +GPUTexture::Format GetFormatForDXGIFormat(DXGI_FORMAT format); +} // namespace D3DCommon diff --git a/src/util/display_ps.hlsl b/src/util/display_ps.hlsl deleted file mode 100644 index f1d84f095..000000000 --- a/src/util/display_ps.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -Texture2D samp0 : register(t0); -SamplerState samp0_ss : register(s0); - -void main(in float2 v_tex0 : TEXCOORD0, - out float4 o_col0 : SV_Target) -{ -#ifdef ALPHA - o_col0 = samp0.Sample(samp0_ss, v_tex0); -#else - o_col0 = float4(samp0.Sample(samp0_ss, v_tex0).rgb, 1.0); -#endif -} \ No newline at end of file diff --git a/src/util/display_ps.hlsl.h b/src/util/display_ps.hlsl.h deleted file mode 100644 index f2cbf5355..000000000 --- a/src/util/display_ps.hlsl.h +++ /dev/null @@ -1,142 +0,0 @@ -#if 0 -// -// Generated by Microsoft (R) HLSL Shader Compiler 10.1 -// -// -// Resource Bindings: -// -// Name Type Format Dim HLSL Bind Count -// ------------------------------ ---------- ------- ----------- -------------- ------ -// samp0_ss sampler NA NA s0 1 -// samp0 texture float4 2d t0 1 -// -// -// -// Input signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// TEXCOORD 0 xy 0 NONE float xy -// -// -// Output signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// SV_Target 0 xyzw 0 TARGET float xyzw -// -ps_4_0 -dcl_sampler s0, mode_default -dcl_resource_texture2d (float,float,float,float) t0 -dcl_input_ps linear v0.xy -dcl_output o0.xyzw -dcl_temps 1 -sample r0.xyzw, v0.xyxx, t0.xyzw, s0 -mov o0.xyz, r0.xyzx -mov o0.w, l(1.000000) -ret -// Approximately 4 instruction slots used -#endif - -const BYTE static s_display_ps_bytecode[] = -{ - 68, 88, 66, 67, 192, 215, - 150, 96, 210, 93, 209, 128, - 113, 254, 100, 56, 49, 113, - 128, 72, 1, 0, 0, 0, - 80, 2, 0, 0, 5, 0, - 0, 0, 52, 0, 0, 0, - 208, 0, 0, 0, 4, 1, - 0, 0, 56, 1, 0, 0, - 212, 1, 0, 0, 82, 68, - 69, 70, 148, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, - 28, 0, 0, 0, 0, 4, - 255, 255, 0, 129, 0, 0, - 107, 0, 0, 0, 92, 0, - 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 101, 0, 0, 0, 2, 0, - 0, 0, 5, 0, 0, 0, - 4, 0, 0, 0, 255, 255, - 255, 255, 0, 0, 0, 0, - 1, 0, 0, 0, 13, 0, - 0, 0, 115, 97, 109, 112, - 48, 95, 115, 115, 0, 115, - 97, 109, 112, 48, 0, 77, - 105, 99, 114, 111, 115, 111, - 102, 116, 32, 40, 82, 41, - 32, 72, 76, 83, 76, 32, - 83, 104, 97, 100, 101, 114, - 32, 67, 111, 109, 112, 105, - 108, 101, 114, 32, 49, 48, - 46, 49, 0, 171, 73, 83, - 71, 78, 44, 0, 0, 0, - 1, 0, 0, 0, 8, 0, - 0, 0, 32, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 3, 3, - 0, 0, 84, 69, 88, 67, - 79, 79, 82, 68, 0, 171, - 171, 171, 79, 83, 71, 78, - 44, 0, 0, 0, 1, 0, - 0, 0, 8, 0, 0, 0, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, - 0, 0, 15, 0, 0, 0, - 83, 86, 95, 84, 97, 114, - 103, 101, 116, 0, 171, 171, - 83, 72, 68, 82, 148, 0, - 0, 0, 64, 0, 0, 0, - 37, 0, 0, 0, 90, 0, - 0, 3, 0, 96, 16, 0, - 0, 0, 0, 0, 88, 24, - 0, 4, 0, 112, 16, 0, - 0, 0, 0, 0, 85, 85, - 0, 0, 98, 16, 0, 3, - 50, 16, 16, 0, 0, 0, - 0, 0, 101, 0, 0, 3, - 242, 32, 16, 0, 0, 0, - 0, 0, 104, 0, 0, 2, - 1, 0, 0, 0, 69, 0, - 0, 9, 242, 0, 16, 0, - 0, 0, 0, 0, 70, 16, - 16, 0, 0, 0, 0, 0, - 70, 126, 16, 0, 0, 0, - 0, 0, 0, 96, 16, 0, - 0, 0, 0, 0, 54, 0, - 0, 5, 114, 32, 16, 0, - 0, 0, 0, 0, 70, 2, - 16, 0, 0, 0, 0, 0, - 54, 0, 0, 5, 130, 32, - 16, 0, 0, 0, 0, 0, - 1, 64, 0, 0, 0, 0, - 128, 63, 62, 0, 0, 1, - 83, 84, 65, 84, 116, 0, - 0, 0, 4, 0, 0, 0, - 1, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0 -}; diff --git a/src/util/display_ps_alpha.hlsl.h b/src/util/display_ps_alpha.hlsl.h deleted file mode 100644 index f863af159..000000000 --- a/src/util/display_ps_alpha.hlsl.h +++ /dev/null @@ -1,131 +0,0 @@ -#if 0 -// -// Generated by Microsoft (R) HLSL Shader Compiler 10.1 -// -// -// Resource Bindings: -// -// Name Type Format Dim HLSL Bind Count -// ------------------------------ ---------- ------- ----------- -------------- ------ -// samp0_ss sampler NA NA s0 1 -// samp0 texture float4 2d t0 1 -// -// -// -// Input signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// TEXCOORD 0 xy 0 NONE float xy -// -// -// Output signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// SV_Target 0 xyzw 0 TARGET float xyzw -// -ps_4_0 -dcl_sampler s0, mode_default -dcl_resource_texture2d (float,float,float,float) t0 -dcl_input_ps linear v0.xy -dcl_output o0.xyzw -sample o0.xyzw, v0.xyxx, t0.xyzw, s0 -ret -// Approximately 2 instruction slots used -#endif - -const BYTE static s_display_ps_alpha_bytecode[] = -{ - 68, 88, 66, 67, 140, 134, - 46, 29, 68, 36, 193, 23, - 94, 171, 102, 123, 183, 66, - 19, 177, 1, 0, 0, 0, - 32, 2, 0, 0, 5, 0, - 0, 0, 52, 0, 0, 0, - 208, 0, 0, 0, 4, 1, - 0, 0, 56, 1, 0, 0, - 164, 1, 0, 0, 82, 68, - 69, 70, 148, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, - 28, 0, 0, 0, 0, 4, - 255, 255, 0, 129, 0, 0, - 107, 0, 0, 0, 92, 0, - 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 101, 0, 0, 0, 2, 0, - 0, 0, 5, 0, 0, 0, - 4, 0, 0, 0, 255, 255, - 255, 255, 0, 0, 0, 0, - 1, 0, 0, 0, 13, 0, - 0, 0, 115, 97, 109, 112, - 48, 95, 115, 115, 0, 115, - 97, 109, 112, 48, 0, 77, - 105, 99, 114, 111, 115, 111, - 102, 116, 32, 40, 82, 41, - 32, 72, 76, 83, 76, 32, - 83, 104, 97, 100, 101, 114, - 32, 67, 111, 109, 112, 105, - 108, 101, 114, 32, 49, 48, - 46, 49, 0, 171, 73, 83, - 71, 78, 44, 0, 0, 0, - 1, 0, 0, 0, 8, 0, - 0, 0, 32, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 3, 3, - 0, 0, 84, 69, 88, 67, - 79, 79, 82, 68, 0, 171, - 171, 171, 79, 83, 71, 78, - 44, 0, 0, 0, 1, 0, - 0, 0, 8, 0, 0, 0, - 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, - 0, 0, 15, 0, 0, 0, - 83, 86, 95, 84, 97, 114, - 103, 101, 116, 0, 171, 171, - 83, 72, 68, 82, 100, 0, - 0, 0, 64, 0, 0, 0, - 25, 0, 0, 0, 90, 0, - 0, 3, 0, 96, 16, 0, - 0, 0, 0, 0, 88, 24, - 0, 4, 0, 112, 16, 0, - 0, 0, 0, 0, 85, 85, - 0, 0, 98, 16, 0, 3, - 50, 16, 16, 0, 0, 0, - 0, 0, 101, 0, 0, 3, - 242, 32, 16, 0, 0, 0, - 0, 0, 69, 0, 0, 9, - 242, 32, 16, 0, 0, 0, - 0, 0, 70, 16, 16, 0, - 0, 0, 0, 0, 70, 126, - 16, 0, 0, 0, 0, 0, - 0, 96, 16, 0, 0, 0, - 0, 0, 62, 0, 0, 1, - 83, 84, 65, 84, 116, 0, - 0, 0, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0 -}; diff --git a/src/util/display_vs.hlsl b/src/util/display_vs.hlsl deleted file mode 100644 index 5b071d4f1..000000000 --- a/src/util/display_vs.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -cbuffer UBOBlock : register(b0) -{ - float4 u_src_rect; -}; - -void main(in uint vertex_id : SV_VertexID, - out float2 v_tex0 : TEXCOORD0, - out float4 o_pos : SV_Position) -{ - float2 pos = float2(float((vertex_id << 1) & 2u), float(vertex_id & 2u)); - v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; - o_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); -} diff --git a/src/util/display_vs.hlsl.h b/src/util/display_vs.hlsl.h deleted file mode 100644 index 3bac3ea39..000000000 --- a/src/util/display_vs.hlsl.h +++ /dev/null @@ -1,197 +0,0 @@ -#if 0 -// -// Generated by Microsoft (R) HLSL Shader Compiler 10.1 -// -// -// Buffer Definitions: -// -// cbuffer UBOBlock -// { -// -// float4 u_src_rect; // Offset: 0 Size: 16 -// -// } -// -// -// Resource Bindings: -// -// Name Type Format Dim HLSL Bind Count -// ------------------------------ ---------- ------- ----------- -------------- ------ -// UBOBlock cbuffer NA NA cb0 1 -// -// -// -// Input signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// SV_VertexID 0 x 0 VERTID uint x -// -// -// Output signature: -// -// Name Index Mask Register SysValue Format Used -// -------------------- ----- ------ -------- -------- ------- ------ -// TEXCOORD 0 xy 0 NONE float xy -// SV_Position 0 xyzw 1 POS float xyzw -// -vs_4_0 -dcl_constantbuffer CB0[1], immediateIndexed -dcl_input_sgv v0.x, vertex_id -dcl_output o0.xy -dcl_output_siv o1.xyzw, position -dcl_temps 1 -ishl r0.x, v0.x, l(1) -and r0.x, r0.x, l(2) -and r0.z, v0.x, l(2) -utof r0.xy, r0.xzxx -mad o0.xy, r0.xyxx, cb0[0].zwzz, cb0[0].xyxx -mad o1.xy, r0.xyxx, l(2.000000, -2.000000, 0.000000, 0.000000), l(-1.000000, 1.000000, 0.000000, 0.000000) -mov o1.zw, l(0,0,0,1.000000) -ret -// Approximately 8 instruction slots used -#endif - -const BYTE static s_display_vs_bytecode[] = -{ - 68, 88, 66, 67, 37, 97, - 157, 234, 112, 10, 38, 98, - 114, 228, 143, 118, 71, 158, - 122, 195, 1, 0, 0, 0, - 72, 3, 0, 0, 5, 0, - 0, 0, 52, 0, 0, 0, - 248, 0, 0, 0, 44, 1, - 0, 0, 132, 1, 0, 0, - 204, 2, 0, 0, 82, 68, - 69, 70, 188, 0, 0, 0, - 1, 0, 0, 0, 72, 0, - 0, 0, 1, 0, 0, 0, - 28, 0, 0, 0, 0, 4, - 254, 255, 0, 129, 0, 0, - 148, 0, 0, 0, 60, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, - 85, 66, 79, 66, 108, 111, - 99, 107, 0, 171, 171, 171, - 60, 0, 0, 0, 1, 0, - 0, 0, 96, 0, 0, 0, - 16, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 120, 0, 0, 0, 0, 0, - 0, 0, 16, 0, 0, 0, - 2, 0, 0, 0, 132, 0, - 0, 0, 0, 0, 0, 0, - 117, 95, 115, 114, 99, 95, - 114, 101, 99, 116, 0, 171, - 1, 0, 3, 0, 1, 0, - 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 77, 105, - 99, 114, 111, 115, 111, 102, - 116, 32, 40, 82, 41, 32, - 72, 76, 83, 76, 32, 83, - 104, 97, 100, 101, 114, 32, - 67, 111, 109, 112, 105, 108, - 101, 114, 32, 49, 48, 46, - 49, 0, 73, 83, 71, 78, - 44, 0, 0, 0, 1, 0, - 0, 0, 8, 0, 0, 0, - 32, 0, 0, 0, 0, 0, - 0, 0, 6, 0, 0, 0, - 1, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 0, 0, - 83, 86, 95, 86, 101, 114, - 116, 101, 120, 73, 68, 0, - 79, 83, 71, 78, 80, 0, - 0, 0, 2, 0, 0, 0, - 8, 0, 0, 0, 56, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, - 0, 0, 0, 0, 0, 0, - 3, 12, 0, 0, 65, 0, - 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 3, 0, - 0, 0, 1, 0, 0, 0, - 15, 0, 0, 0, 84, 69, - 88, 67, 79, 79, 82, 68, - 0, 83, 86, 95, 80, 111, - 115, 105, 116, 105, 111, 110, - 0, 171, 171, 171, 83, 72, - 68, 82, 64, 1, 0, 0, - 64, 0, 1, 0, 80, 0, - 0, 0, 89, 0, 0, 4, - 70, 142, 32, 0, 0, 0, - 0, 0, 1, 0, 0, 0, - 96, 0, 0, 4, 18, 16, - 16, 0, 0, 0, 0, 0, - 6, 0, 0, 0, 101, 0, - 0, 3, 50, 32, 16, 0, - 0, 0, 0, 0, 103, 0, - 0, 4, 242, 32, 16, 0, - 1, 0, 0, 0, 1, 0, - 0, 0, 104, 0, 0, 2, - 1, 0, 0, 0, 41, 0, - 0, 7, 18, 0, 16, 0, - 0, 0, 0, 0, 10, 16, - 16, 0, 0, 0, 0, 0, - 1, 64, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 7, - 18, 0, 16, 0, 0, 0, - 0, 0, 10, 0, 16, 0, - 0, 0, 0, 0, 1, 64, - 0, 0, 2, 0, 0, 0, - 1, 0, 0, 7, 66, 0, - 16, 0, 0, 0, 0, 0, - 10, 16, 16, 0, 0, 0, - 0, 0, 1, 64, 0, 0, - 2, 0, 0, 0, 86, 0, - 0, 5, 50, 0, 16, 0, - 0, 0, 0, 0, 134, 0, - 16, 0, 0, 0, 0, 0, - 50, 0, 0, 11, 50, 32, - 16, 0, 0, 0, 0, 0, - 70, 0, 16, 0, 0, 0, - 0, 0, 230, 138, 32, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 70, 128, 32, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 50, 0, 0, 15, - 50, 32, 16, 0, 1, 0, - 0, 0, 70, 0, 16, 0, - 0, 0, 0, 0, 2, 64, - 0, 0, 0, 0, 0, 64, - 0, 0, 0, 192, 0, 0, - 0, 0, 0, 0, 0, 0, - 2, 64, 0, 0, 0, 0, - 128, 191, 0, 0, 128, 63, - 0, 0, 0, 0, 0, 0, - 0, 0, 54, 0, 0, 8, - 194, 32, 16, 0, 1, 0, - 0, 0, 2, 64, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 128, 63, 62, 0, - 0, 1, 83, 84, 65, 84, - 116, 0, 0, 0, 8, 0, - 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 3, 0, - 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 2, 0, - 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0 -}; diff --git a/src/common/gl/context.cpp b/src/util/gl/context.cpp similarity index 87% rename from src/common/gl/context.cpp rename to src/util/gl/context.cpp index 266a2b976..fe577336b 100644 --- a/src/common/gl/context.cpp +++ b/src/util/gl/context.cpp @@ -2,8 +2,10 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context.h" -#include "../log.h" -#include "loader.h" +#include "../opengl_loader.h" + +#include "common/log.h" + #include #include #ifdef __APPLE__ @@ -11,38 +13,23 @@ #else #include #endif -Log_SetChannel(GL::Context); #if defined(_WIN32) && !defined(_M_ARM64) #include "context_wgl.h" #elif defined(__APPLE__) #include "context_agl.h" -#endif - +#else #ifdef USE_EGL -#if defined(USE_WAYLAND) || defined(USE_GBM) || defined(USE_FBDEV) || defined(USE_X11) -#if defined(USE_WAYLAND) +#ifdef USE_WAYLAND #include "context_egl_wayland.h" #endif -#if defined(USE_GBM) -#include "context_egl_gbm.h" -#endif -#if defined(USE_FBDEV) -#include "context_egl_fbdev.h" -#endif -#if defined(USE_X11) +#ifdef USE_X11 #include "context_egl_x11.h" #endif -#elif defined(ANDROID) -#include "context_egl_android.h" -#else -#error Unknown EGL platform #endif #endif -#ifdef USE_GLX -#include "context_glx.h" -#endif +Log_SetChannel(GL::Context); namespace GL { @@ -96,7 +83,9 @@ static void DisableBrokenExtensions(const char* gl_vendor, const char* gl_render } } -Context::Context(const WindowInfo& wi) : m_wi(wi) {} +Context::Context(const WindowInfo& wi) : m_wi(wi) +{ +} Context::~Context() = default; @@ -131,40 +120,15 @@ std::unique_ptr Context::Create(const WindowInfo& wi, const Version context = ContextWGL::Create(wi, versions_to_try, num_versions_to_try); #elif defined(__APPLE__) context = ContextAGL::Create(wi, versions_to_try, num_versions_to_try); -#elif defined(ANDROID) -#ifdef USE_EGL - context = ContextEGLAndroid::Create(wi, versions_to_try, num_versions_to_try); -#endif -#endif - +#else #if defined(USE_X11) if (wi.type == WindowInfo::Type::X11) - { -#ifdef USE_EGL - const char* use_glx = std::getenv("USE_GLX"); - if (use_glx && std::strcmp(use_glx, "1") == 0) - context = ContextGLX::Create(wi, versions_to_try, num_versions_to_try); - else - context = ContextEGLX11::Create(wi, versions_to_try, num_versions_to_try); -#else - context = ContextGLX::Create(wi, versions_to_try, num_versions_to_try); + context = ContextEGLX11::Create(wi, versions_to_try, num_versions_to_try); #endif - } -#endif - #if defined(USE_WAYLAND) if (wi.type == WindowInfo::Type::Wayland) context = ContextEGLWayland::Create(wi, versions_to_try, num_versions_to_try); #endif - -#if defined(USE_GBM) - if (wi.type == WindowInfo::Type::Display) - context = ContextEGLGBM::Create(wi, versions_to_try, num_versions_to_try); -#endif - -#if defined(USE_FBDEV) - if (wi.type == WindowInfo::Type::Display) - context = ContextEGLFBDev::Create(wi, versions_to_try, num_versions_to_try); #endif if (!context) diff --git a/src/common/gl/context.h b/src/util/gl/context.h similarity index 94% rename from src/common/gl/context.h rename to src/util/gl/context.h index f4c90311c..75e4a9848 100644 --- a/src/common/gl/context.h +++ b/src/util/gl/context.h @@ -2,8 +2,11 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../types.h" + #include "../window_info.h" + +#include "common/types.h" + #include #include #include @@ -40,7 +43,7 @@ public: ALWAYS_INLINE bool IsGLES() const { return (m_version.profile == Profile::ES); } ALWAYS_INLINE u32 GetSurfaceWidth() const { return m_wi.surface_width; } ALWAYS_INLINE u32 GetSurfaceHeight() const { return m_wi.surface_height; } - ALWAYS_INLINE WindowInfo::SurfaceFormat GetSurfaceFormat() const { return m_wi.surface_format; } + ALWAYS_INLINE GPUTexture::Format GetSurfaceFormat() const { return m_wi.surface_format; } virtual void* GetProcAddress(const char* name) = 0; virtual bool ChangeSurface(const WindowInfo& new_wi) = 0; diff --git a/src/common/gl/context_agl.h b/src/util/gl/context_agl.h similarity index 98% rename from src/common/gl/context_agl.h rename to src/util/gl/context_agl.h index f26547e33..526ec878e 100644 --- a/src/common/gl/context_agl.h +++ b/src/util/gl/context_agl.h @@ -3,7 +3,7 @@ #pragma once #include "context.h" -#include "loader.h" +#include "../opengl_loader.h" #if defined(__APPLE__) && defined(__OBJC__) #import diff --git a/src/common/gl/context_agl.mm b/src/util/gl/context_agl.mm similarity index 98% rename from src/common/gl/context_agl.mm rename to src/util/gl/context_agl.mm index 96ab9b135..4cd76a711 100644 --- a/src/common/gl/context_agl.mm +++ b/src/util/gl/context_agl.mm @@ -2,9 +2,8 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_agl.h" -#include "../assert.h" -#include "../log.h" -#include "loader.h" +#include "common/assert.h" +#include "common/log.h" #include Log_SetChannel(GL::ContextAGL); diff --git a/src/common/gl/context_egl.cpp b/src/util/gl/context_egl.cpp similarity index 88% rename from src/common/gl/context_egl.cpp rename to src/util/gl/context_egl.cpp index 8bf4ca040..f5576e376 100644 --- a/src/common/gl/context_egl.cpp +++ b/src/util/gl/context_egl.cpp @@ -1,15 +1,20 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_egl.h" -#include "../assert.h" -#include "../log.h" + +#include "common/assert.h" +#include "common/log.h" + #include #include + Log_SetChannel(GL::ContextEGL); namespace GL { -ContextEGL::ContextEGL(const WindowInfo& wi) : Context(wi) {} +ContextEGL::ContextEGL(const WindowInfo& wi) : Context(wi) +{ +} ContextEGL::~ContextEGL() { @@ -231,7 +236,7 @@ bool ContextEGL::CreatePBufferSurface() return true; } -bool ContextEGL::CheckConfigSurfaceFormat(EGLConfig config, WindowInfo::SurfaceFormat format) const +bool ContextEGL::CheckConfigSurfaceFormat(EGLConfig config, GPUTexture::Format format) { int red_size, green_size, blue_size, alpha_size; if (!eglGetConfigAttrib(m_display, config, EGL_RED_SIZE, &red_size) || @@ -244,18 +249,38 @@ bool ContextEGL::CheckConfigSurfaceFormat(EGLConfig config, WindowInfo::SurfaceF switch (format) { - case WindowInfo::SurfaceFormat::Auto: + case GPUTexture::Format::Unknown: + { + if (red_size == 5 && green_size == 6 && red_size == 5) + { + m_wi.surface_format = GPUTexture::Format::RGB565; + } + else if (red_size == 5 && green_size == 5 && red_size == 5 && alpha_size == 1) + { + m_wi.surface_format = GPUTexture::Format::RGBA5551; + } + else if (red_size == 8 && green_size == 8 && blue_size == 8 && alpha_size == 8) + { + m_wi.surface_format = GPUTexture::Format::RGBA8; + } + else + { + Log_ErrorPrintf("Unknown surface format: R=%u, G=%u, B=%u, A=%u", red_size, green_size, blue_size, alpha_size); + m_wi.surface_format = GPUTexture::Format::RGBA8; + } + return true; + } - case WindowInfo::SurfaceFormat::RGB8: - return (red_size == 8 && green_size == 8 && blue_size == 8); - - case WindowInfo::SurfaceFormat::RGBA8: + case GPUTexture::Format::RGBA8: return (red_size == 8 && green_size == 8 && blue_size == 8 && alpha_size == 8); - case WindowInfo::SurfaceFormat::RGB565: + case GPUTexture::Format::RGB565: return (red_size == 5 && green_size == 6 && blue_size == 5); + case GPUTexture::Format::RGBA5551: + return (red_size == 5 && green_size == 5 && blue_size == 5 && alpha_size == 1); + default: return false; } @@ -301,18 +326,16 @@ bool ContextEGL::CreateContext(const Version& version, EGLContext share_context) }; int nsurface_attribs = 4; + const GPUTexture::Format format = m_wi.surface_format; + if (format == GPUTexture::Format::Unknown) + { + Log_WarningPrint("Surface format not specified, assuming RGBA8."); + m_wi.surface_format = GPUTexture::Format::RGBA8; + } + switch (m_wi.surface_format) { - case WindowInfo::SurfaceFormat::RGB8: - surface_attribs[nsurface_attribs++] = EGL_RED_SIZE; - surface_attribs[nsurface_attribs++] = 8; - surface_attribs[nsurface_attribs++] = EGL_GREEN_SIZE; - surface_attribs[nsurface_attribs++] = 8; - surface_attribs[nsurface_attribs++] = EGL_BLUE_SIZE; - surface_attribs[nsurface_attribs++] = 8; - break; - - case WindowInfo::SurfaceFormat::RGBA8: + case GPUTexture::Format::RGBA8: surface_attribs[nsurface_attribs++] = EGL_RED_SIZE; surface_attribs[nsurface_attribs++] = 8; surface_attribs[nsurface_attribs++] = EGL_GREEN_SIZE; @@ -323,7 +346,7 @@ bool ContextEGL::CreateContext(const Version& version, EGLContext share_context) surface_attribs[nsurface_attribs++] = 8; break; - case WindowInfo::SurfaceFormat::RGB565: + case GPUTexture::Format::RGB565: surface_attribs[nsurface_attribs++] = EGL_RED_SIZE; surface_attribs[nsurface_attribs++] = 5; surface_attribs[nsurface_attribs++] = EGL_GREEN_SIZE; @@ -332,7 +355,7 @@ bool ContextEGL::CreateContext(const Version& version, EGLContext share_context) surface_attribs[nsurface_attribs++] = 5; break; - case WindowInfo::SurfaceFormat::Auto: + case GPUTexture::Format::Unknown: break; default: diff --git a/src/common/gl/context_egl.h b/src/util/gl/context_egl.h similarity index 91% rename from src/common/gl/context_egl.h rename to src/util/gl/context_egl.h index eb228d855..123aaefb8 100644 --- a/src/common/gl/context_egl.h +++ b/src/util/gl/context_egl.h @@ -1,8 +1,10 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "context.h" + #include "glad_egl.h" namespace GL { @@ -36,7 +38,7 @@ protected: bool CreateContextAndSurface(const Version& version, EGLContext share_context, bool make_current); bool CreateSurface(); bool CreatePBufferSurface(); - bool CheckConfigSurfaceFormat(EGLConfig config, WindowInfo::SurfaceFormat format) const; + bool CheckConfigSurfaceFormat(EGLConfig config, GPUTexture::Format format); void DestroyContext(); void DestroySurface(); diff --git a/src/common/gl/context_egl_wayland.cpp b/src/util/gl/context_egl_wayland.cpp similarity index 99% rename from src/common/gl/context_egl_wayland.cpp rename to src/util/gl/context_egl_wayland.cpp index 19a50ae96..bca5d5bf1 100644 --- a/src/common/gl/context_egl_wayland.cpp +++ b/src/util/gl/context_egl_wayland.cpp @@ -2,8 +2,11 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_egl_wayland.h" -#include "../log.h" + +#include "common/log.h" + #include + Log_SetChannel(ContextEGLWayland); namespace GL { diff --git a/src/common/gl/context_egl_wayland.h b/src/util/gl/context_egl_wayland.h similarity index 100% rename from src/common/gl/context_egl_wayland.h rename to src/util/gl/context_egl_wayland.h diff --git a/src/common/gl/context_egl_x11.cpp b/src/util/gl/context_egl_x11.cpp similarity index 56% rename from src/common/gl/context_egl_x11.cpp rename to src/util/gl/context_egl_x11.cpp index 18542b310..440c4a755 100644 --- a/src/common/gl/context_egl_x11.cpp +++ b/src/util/gl/context_egl_x11.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_egl_x11.h" -#include "../log.h" -Log_SetChannel(GL::ContextEGLX11); namespace GL { ContextEGLX11::ContextEGLX11(const WindowInfo& wi) : ContextEGL(wi) {} @@ -32,41 +30,11 @@ std::unique_ptr ContextEGLX11::CreateSharedContext(const WindowInfo& wi void ContextEGLX11::ResizeSurface(u32 new_surface_width, u32 new_surface_height) { - m_window.Resize(); ContextEGL::ResizeSurface(new_surface_width, new_surface_height); } EGLNativeWindowType ContextEGLX11::GetNativeWindow(EGLConfig config) { - X11InhibitErrors ei; - - EGLint native_visual_id = 0; - if (!eglGetConfigAttrib(m_display, m_config, EGL_NATIVE_VISUAL_ID, &native_visual_id)) - { - Log_ErrorPrintf("Failed to get X11 visual ID"); - return false; - } - - XVisualInfo vi_query = {}; - vi_query.visualid = native_visual_id; - - int num_vis; - XVisualInfo* vi = XGetVisualInfo(static_cast(m_wi.display_connection), VisualIDMask, &vi_query, &num_vis); - if (num_vis <= 0 || !vi) - { - Log_ErrorPrintf("Failed to query visual from X11"); - return false; - } - - m_window.Destroy(); - if (!m_window.Create(GetDisplay(), static_cast(reinterpret_cast(m_wi.window_handle)), vi)) - { - Log_ErrorPrintf("Faild to create X11 child window"); - XFree(vi); - return false; - } - - XFree(vi); - return static_cast(m_window.GetWindow()); + return (EGLNativeWindowType)m_wi.window_handle; } } // namespace GL diff --git a/src/common/gl/context_egl_x11.h b/src/util/gl/context_egl_x11.h similarity index 82% rename from src/common/gl/context_egl_x11.h rename to src/util/gl/context_egl_x11.h index 486a3bc1f..6bd3d8cae 100644 --- a/src/common/gl/context_egl_x11.h +++ b/src/util/gl/context_egl_x11.h @@ -3,7 +3,6 @@ #pragma once #include "context_egl.h" -#include "x11_window.h" namespace GL { @@ -21,11 +20,6 @@ public: protected: EGLNativeWindowType GetNativeWindow(EGLConfig config) override; - -private: - ALWAYS_INLINE Display* GetDisplay() const { return static_cast(m_wi.display_connection); } - - X11Window m_window; }; } // namespace GL diff --git a/src/common/gl/context_wgl.cpp b/src/util/gl/context_wgl.cpp similarity index 97% rename from src/common/gl/context_wgl.cpp rename to src/util/gl/context_wgl.cpp index 2b5d22b74..e16232ae0 100644 --- a/src/common/gl/context_wgl.cpp +++ b/src/util/gl/context_wgl.cpp @@ -1,15 +1,14 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "context_wgl.h" -#include "../assert.h" -#include "../log.h" -#include "../scoped_guard.h" -#include "loader.h" -Log_SetChannel(GL::ContextWGL); +#include "../opengl_loader.h" -// TODO: get rid of this -#pragma comment(lib, "opengl32.lib") +#include "common/assert.h" +#include "common/log.h" +#include "common/scoped_guard.h" + +Log_SetChannel(GL::ContextWGL); static void* GetProcAddressCallback(const char* name) { @@ -33,7 +32,9 @@ static bool ReloadWGL(HDC dc) } namespace GL { -ContextWGL::ContextWGL(const WindowInfo& wi) : Context(wi) {} +ContextWGL::ContextWGL(const WindowInfo& wi) : Context(wi) +{ +} ContextWGL::~ContextWGL() { @@ -227,6 +228,7 @@ HDC ContextWGL::GetDCAndSetPixelFormat(HWND hwnd) return {}; } + m_wi.surface_format = GPUTexture::Format::RGBA8; return hDC; } diff --git a/src/common/gl/context_wgl.h b/src/util/gl/context_wgl.h similarity index 91% rename from src/common/gl/context_wgl.h rename to src/util/gl/context_wgl.h index 6a032b5a1..d5bc005e6 100644 --- a/src/common/gl/context_wgl.h +++ b/src/util/gl/context_wgl.h @@ -1,12 +1,15 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../windows_headers.h" - #include "context.h" + +#include "../opengl_loader.h" + +#include "common/windows_headers.h" + #include "glad_wgl.h" -#include "loader.h" + #include namespace GL { diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp new file mode 100644 index 000000000..46a82e36a --- /dev/null +++ b/src/util/gpu_device.cpp @@ -0,0 +1,1580 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_device.h" +#include "core/host_settings.h" +#include "core/settings.h" +#include "core/system.h" +#include "postprocessing_chain.h" +#include "shadergen.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/file_system.h" +#include "common/hash_combine.h" +#include "common/heap_array.h" +#include "common/log.h" +#include "common/path.h" +#include "common/string_util.h" +#include "common/timer.h" + +#include "fmt/format.h" +#include "imgui.h" +#include "stb_image.h" +#include "stb_image_resize.h" +#include "stb_image_write.h" + +#include +#include +#include +#include +#include + +Log_SetChannel(GPUDevice); + +#ifdef _WIN32 +#include "common/windows_headers.h" +#include "d3d11_device.h" +#include "d3d12_device.h" +#endif + +#ifdef WITH_OPENGL +#include "opengl_device.h" +#endif + +#ifdef WITH_VULKAN +#include "vulkan_device.h" +#endif + +std::unique_ptr g_gpu_device; + +static std::string s_pipeline_cache_path; + +GPUFramebuffer::GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height) + : m_rt(rt), m_ds(ds), m_width(width), m_height(height) +{ +} + +GPUFramebuffer::~GPUFramebuffer() = default; + +GPUSampler::GPUSampler() = default; + +GPUSampler::~GPUSampler() = default; + +GPUSampler::Config GPUSampler::GetNearestConfig() +{ + Config config = {}; + config.address_u = GPUSampler::AddressMode::ClampToEdge; + config.address_v = GPUSampler::AddressMode::ClampToEdge; + config.address_w = GPUSampler::AddressMode::ClampToEdge; + config.min_filter = GPUSampler::Filter::Nearest; + config.mag_filter = GPUSampler::Filter::Nearest; + return config; +} + +GPUSampler::Config GPUSampler::GetLinearConfig() +{ + Config config = {}; + config.address_u = GPUSampler::AddressMode::ClampToEdge; + config.address_v = GPUSampler::AddressMode::ClampToEdge; + config.address_w = GPUSampler::AddressMode::ClampToEdge; + config.min_filter = GPUSampler::Filter::Linear; + config.mag_filter = GPUSampler::Filter::Linear; + return config; +} + +GPUShader::GPUShader(GPUShaderStage stage) : m_stage(stage) +{ +} + +GPUShader::~GPUShader() = default; + +const char* GPUShader::GetStageName(GPUShaderStage stage) +{ + switch (stage) + { + case GPUShaderStage::Vertex: + return "Vertex"; + case GPUShaderStage::Fragment: + return "Fragment"; + case GPUShaderStage::Compute: + return "Compute"; + default: + UnreachableCode(); + return ""; + } +} + +GPUPipeline::GPUPipeline() = default; + +GPUPipeline::~GPUPipeline() = default; + +size_t GPUPipeline::InputLayoutHash::operator()(const InputLayout& il) const +{ + std::size_t h = 0; + hash_combine(h, il.vertex_attributes.size(), il.vertex_stride); + + for (const VertexAttribute& va : il.vertex_attributes) + hash_combine(h, va.key); + + return h; +} + +bool GPUPipeline::InputLayout::operator==(const InputLayout& rhs) const +{ + return (vertex_stride == rhs.vertex_stride && vertex_attributes.size() == rhs.vertex_attributes.size() && + std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), + sizeof(VertexAttribute) * rhs.vertex_attributes.size()) == 0); +} + +bool GPUPipeline::InputLayout::operator!=(const InputLayout& rhs) const +{ + return (vertex_stride != rhs.vertex_stride || + vertex_attributes.size() != rhs.vertex_attributes.size() && + std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), + sizeof(VertexAttribute) * rhs.vertex_attributes.size()) != 0); +} + +GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState() +{ + RasterizationState ret = {}; + ret.cull_mode = CullMode::None; + return ret; +} + +GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState() +{ + DepthState ret = {}; + ret.depth_test = DepthFunc::Always; + return ret; +} + +GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState() +{ + DepthState ret = {}; + ret.depth_test = DepthFunc::Always; + ret.depth_write = true; + return ret; +} + +GPUPipeline::BlendState GPUPipeline::BlendState::GetNoBlendingState() +{ + BlendState ret = {}; + ret.write_mask = 0xf; + return ret; +} + +GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState() +{ + BlendState ret = {}; + ret.enable = true; + ret.src_blend = BlendFunc::SrcAlpha; + ret.dst_blend = BlendFunc::InvSrcAlpha; + ret.blend_op = BlendOp::Add; + ret.src_alpha_blend = BlendFunc::One; + ret.dst_alpha_blend = BlendFunc::Zero; + ret.alpha_blend_op = BlendOp::Add; + ret.write_mask = 0xf; + return ret; +} + +GPUTextureBuffer::GPUTextureBuffer(Format format, u32 size) : m_format(format), m_size_in_elements(size) +{ +} + +GPUTextureBuffer::~GPUTextureBuffer() = default; + +u32 GPUTextureBuffer::GetElementSize(Format format) +{ + static constexpr std::array(Format::MaxCount)> element_size = {{ + sizeof(u16), + }}; + + return element_size[static_cast(format)]; +} + +GPUDevice::~GPUDevice() = default; + +RenderAPI GPUDevice::GetPreferredAPI() +{ +#ifdef _WIN32 + return RenderAPI::D3D11; +#else + return RenderAPI::Metal; +#endif +} + +const char* GPUDevice::RenderAPIToString(RenderAPI api) +{ + // TODO: Combine ES + switch (api) + { + // clang-format off +#define CASE(x) case RenderAPI::x: return #x + CASE(None); + CASE(D3D11); + CASE(D3D12); + CASE(Metal); + CASE(Vulkan); + CASE(OpenGL); + CASE(OpenGLES); +#undef CASE + // clang-format on + default: + return "Unknown"; + } +} + +bool GPUDevice::Create(const std::string_view& adapter, const std::string_view& shader_cache_path, + u32 shader_cache_version, bool debug_device, bool vsync, bool threaded_presentation) +{ + m_vsync_enabled = vsync; + m_debug_device = debug_device; + + if (!AcquireWindow(true)) + { + Log_ErrorPrintf("Failed to acquire window from host."); + return false; + } + + if (!CreateDevice(adapter, threaded_presentation)) + { + Log_ErrorPrintf("Failed to create device."); + return false; + } + + Log_InfoPrintf("Graphics Driver Info:\n%s", GetDriverInfo().c_str()); + + OpenShaderCache(shader_cache_path, shader_cache_version); + + if (!CreateResources()) + { + Log_ErrorPrintf("Failed to create base resources."); + return false; + } + + return true; +} + +void GPUDevice::Destroy() +{ + m_post_processing_chain.reset(); + if (HasSurface()) + DestroySurface(); + DestroyResources(); + CloseShaderCache(); + DestroyDevice(); +} + +bool GPUDevice::SupportsExclusiveFullscreen() const +{ + return false; +} + +void GPUDevice::OpenShaderCache(const std::string_view& base_path, u32 version) +{ + if (m_features.shader_cache && !base_path.empty()) + { + const std::string basename = GetShaderCacheBaseName("shaders"); + const std::string filename = Path::Combine(base_path, basename); + if (!m_shader_cache.Open(filename.c_str(), version)) + { + Log_WarningPrintf("Failed to open shader cache. Creating new cache."); + if (!m_shader_cache.Create()) + Log_ErrorPrintf("Failed to create new shader cache."); + + // Squish the pipeline cache too, it's going to be stale. + if (m_features.pipeline_cache) + { + const std::string pc_filename = + Path::Combine(base_path, TinyString::FromFmt("{}.bin", GetShaderCacheBaseName("pipelines"))); + if (FileSystem::FileExists(pc_filename.c_str())) + { + Log_InfoPrintf("Removing old pipeline cache '%s'", pc_filename.c_str()); + FileSystem::DeleteFile(pc_filename.c_str()); + } + } + } + } + else + { + // Still need to set the version - GL needs it. + m_shader_cache.Open(std::string_view(), version); + } + + s_pipeline_cache_path = {}; + if (m_features.pipeline_cache && !base_path.empty()) + { + const std::string basename = GetShaderCacheBaseName("pipelines"); + const std::string filename = Path::Combine(base_path, TinyString::FromFmt("{}.bin", basename)); + if (ReadPipelineCache(filename)) + s_pipeline_cache_path = std::move(filename); + else + Log_WarningPrintf("Failed to read pipeline cache."); + } +} + +void GPUDevice::CloseShaderCache() +{ + m_shader_cache.Close(); + + if (!s_pipeline_cache_path.empty()) + { + DynamicHeapArray data; + if (GetPipelineCacheData(&data)) + { + // Save disk writes if it hasn't changed, think of the poor SSDs. + FILESYSTEM_STAT_DATA sd; + if (!FileSystem::StatFile(s_pipeline_cache_path.c_str(), &sd) || sd.Size != static_cast(data.size())) + { + Log_InfoPrintf("Writing %zu bytes to '%s'", data.size(), s_pipeline_cache_path.c_str()); + if (!FileSystem::WriteBinaryFile(s_pipeline_cache_path.c_str(), data.data(), data.size())) + Log_ErrorPrintf("Failed to write pipeline cache to '%s'", s_pipeline_cache_path.c_str()); + } + else + { + Log_InfoPrintf("Skipping updating pipeline cache '%s' due to no changes.", s_pipeline_cache_path.c_str()); + } + } + + s_pipeline_cache_path = {}; + } +} + +std::string GPUDevice::GetShaderCacheBaseName(const std::string_view& type) const +{ + const std::string_view debug_suffix = m_debug_device ? "_debug" : ""; + + std::string ret; + switch (GetRenderAPI()) + { +#ifdef _WIN32 + case RenderAPI::D3D11: + ret = fmt::format("d3d11_{}{}", type, debug_suffix); + break; + case RenderAPI::D3D12: + ret = fmt::format("d3d12_{}{}", type, debug_suffix); + break; +#endif +#ifdef WITH_VULKAN + case RenderAPI::Vulkan: + ret = fmt::format("vulkan_{}{}", type, debug_suffix); + break; +#endif +#ifdef WITH_OPENGL + case RenderAPI::OpenGL: + ret = fmt::format("opengl_{}{}", type, debug_suffix); + break; + case RenderAPI::OpenGLES: + ret = fmt::format("opengles_{}{}", type, debug_suffix); + break; +#endif +#ifdef __APPLE__ + case RenderAPI::Metal: + ret = fmt::format("metal_{}{}", type, debug_suffix); + break; +#endif + default: + UnreachableCode(); + break; + } + + return ret; +} + +bool GPUDevice::ReadPipelineCache(const std::string& filename) +{ + return false; +} + +bool GPUDevice::GetPipelineCacheData(DynamicHeapArray* data) +{ + return false; +} + +bool GPUDevice::AcquireWindow(bool recreate_window) +{ + std::optional wi = Host::AcquireRenderWindow(recreate_window); + if (!wi.has_value()) + return false; + + Log_InfoPrintf("Render window is %ux%u.", wi->surface_width, wi->surface_height); + m_window_info = wi.value(); + return true; +} + +bool GPUDevice::CreateResources() +{ + if (!(m_nearest_sampler = CreateSampler(GPUSampler::GetNearestConfig()))) + return false; + + if (!(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) + return false; + + ShaderGen shadergen(GetRenderAPI(), m_features.dual_source_blend); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.input_layout.vertex_stride = 0; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.color_format = HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + + std::unique_ptr display_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateDisplayVertexShader()); + std::unique_ptr display_fs = + CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(true)); + std::unique_ptr cursor_fs = + CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(false)); + if (!display_vs || !display_fs || !cursor_fs) + return false; + GL_OBJECT_NAME(display_vs, "Display Vertex Shader"); + GL_OBJECT_NAME(display_fs, "Display Fragment Shader"); + GL_OBJECT_NAME(cursor_fs, "Cursor Fragment Shader"); + + plconfig.vertex_shader = display_vs.get(); + plconfig.fragment_shader = display_fs.get(); + if (!(m_display_pipeline = CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_display_pipeline, "Display Pipeline"); + + plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); + plconfig.fragment_shader = cursor_fs.get(); + if (!(m_cursor_pipeline = CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_cursor_pipeline, "Cursor Pipeline"); + + std::unique_ptr imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateImGuiVertexShader()); + std::unique_ptr imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GenerateImGuiFragmentShader()); + if (!imgui_vs || !imgui_fs) + return false; + GL_OBJECT_NAME(imgui_vs, "ImGui Vertex Shader"); + GL_OBJECT_NAME(imgui_fs, "ImGui Fragment Shader"); + + static constexpr GPUPipeline::VertexAttribute imgui_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, + GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, pos)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, + GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, uv)), + GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Semantic::Color, 0, + GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(ImDrawVert, col)), + }; + + plconfig.input_layout.vertex_attributes = imgui_attributes; + plconfig.input_layout.vertex_stride = sizeof(ImDrawVert); + plconfig.vertex_shader = imgui_vs.get(); + plconfig.fragment_shader = imgui_fs.get(); + + m_imgui_pipeline = CreatePipeline(plconfig); + if (!m_imgui_pipeline) + { + Log_ErrorPrintf("Failed to compile ImGui pipeline."); + return false; + } + GL_OBJECT_NAME(m_imgui_pipeline, "ImGui Pipeline"); + + return true; +} + +void GPUDevice::DestroyResources() +{ + m_cursor_texture.reset(); + + m_imgui_font_texture.reset(); + m_imgui_pipeline.reset(); + + m_cursor_pipeline.reset(); + m_display_pipeline.reset(); + m_imgui_pipeline.reset(); + + m_linear_sampler.reset(); + m_nearest_sampler.reset(); + + m_shader_cache.Close(); +} + +bool GPUDevice::SetPostProcessingChain(const std::string_view& config) +{ + m_post_processing_chain.reset(); + + if (config.empty()) + return true; + else if (m_window_info.surface_format == GPUTexture::Format::Unknown) + return false; + + m_post_processing_chain = std::make_unique(); + if (!m_post_processing_chain->CreateFromString(config) || + !m_post_processing_chain->CheckTargets(m_window_info.surface_format, m_window_info.surface_width, + m_window_info.surface_height)) + { + m_post_processing_chain.reset(); + return false; + } + else if (m_post_processing_chain->IsEmpty()) + { + m_post_processing_chain.reset(); + return true; + } + + return true; +} + +void GPUDevice::RenderImGui() +{ + GL_SCOPE("RenderImGui"); + + ImGui::Render(); + + const ImDrawData* draw_data = ImGui::GetDrawData(); + if (draw_data->CmdListsCount == 0) + return; + + SetPipeline(m_imgui_pipeline.get()); + SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); + + const float L = 0.0f; + const float R = static_cast(m_window_info.surface_width); + const float T = 0.0f; + const float B = static_cast(m_window_info.surface_height); + const float ortho_projection[4][4] = { + {2.0f / (R - L), 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f / (T - B), 0.0f, 0.0f}, + {0.0f, 0.0f, 0.5f, 0.0f}, + {(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f}, + }; + PushUniformBuffer(ortho_projection, sizeof(ortho_projection)); + + // Render command lists + for (int n = 0; n < draw_data->CmdListsCount; n++) + { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + static_assert(sizeof(ImDrawIdx) == sizeof(DrawIndex)); + + u32 base_vertex, base_index; + UploadVertexBuffer(cmd_list->VtxBuffer.Data, sizeof(ImDrawVert), cmd_list->VtxBuffer.Size, &base_vertex); + UploadIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size, &base_index); + + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + DebugAssert(!pcmd->UserCallback); + + if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y) + continue; + + SetScissor(static_cast(pcmd->ClipRect.x), static_cast(pcmd->ClipRect.y), + static_cast(pcmd->ClipRect.z - pcmd->ClipRect.x), + static_cast(pcmd->ClipRect.w - pcmd->ClipRect.y)); + SetTextureSampler(0, reinterpret_cast(pcmd->TextureId), m_linear_sampler.get()); + DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset); + } + } +} + +void GPUDevice::UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex) +{ + void* map; + u32 space; + MapVertexBuffer(vertex_size, vertex_count, &map, &space, base_vertex); + std::memcpy(map, vertices, vertex_size * vertex_count); + UnmapVertexBuffer(vertex_size, vertex_count); +} + +void GPUDevice::UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index) +{ + u16* map; + u32 space; + MapIndexBuffer(index_count, &map, &space, base_index); + std::memcpy(map, indices, sizeof(u16) * index_count); + UnmapIndexBuffer(index_count); +} + +void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) +{ + void* map = MapUniformBuffer(data_size); + std::memcpy(map, data, data_size); + UnmapUniformBuffer(data_size); +} + +void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) +{ + SetViewport(x, y, width, height); + SetScissor(x, y, width, height); +} + +void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c) +{ + t->SetClearColor(c); +} + +void GPUDevice::ClearDepth(GPUTexture* t, float d) +{ + t->SetClearDepth(d); +} + +void GPUDevice::InvalidateRenderTarget(GPUTexture* t) +{ + t->SetState(GPUTexture::State::Invalidated); +} + +std::unique_ptr GPUDevice::CreateShader(GPUShaderStage stage, const std::string_view& source, + const char* entry_point /* = "main" */) +{ + std::unique_ptr shader; + if (!m_shader_cache.IsOpen()) + { + shader = CreateShaderFromSource(stage, source, entry_point, nullptr); + return shader; + } + + const GPUShaderCache::CacheIndexKey key = m_shader_cache.GetCacheKey(stage, source, entry_point); + DynamicHeapArray binary; + if (m_shader_cache.Lookup(key, &binary)) + { + shader = CreateShaderFromBinary(stage, binary); + if (shader) + return shader; + + Log_ErrorPrintf("Failed to create shader from binary (driver changed?). Clearing cache."); + m_shader_cache.Clear(); + } + + shader = CreateShaderFromSource(stage, source, entry_point, &binary); + if (!shader) + return shader; + + // Don't insert empty shaders into the cache... + if (!binary.empty()) + { + if (!m_shader_cache.Insert(key, binary.data(), static_cast(binary.size()))) + m_shader_cache.Close(); + } + + return shader; +} + +bool GPUDevice::GetRequestedExclusiveFullscreenMode(u32* width, u32* height, float* refresh_rate) +{ + const std::string mode = Host::GetBaseStringSettingValue("GPU", "FullscreenMode", ""); + if (!mode.empty()) + { + const std::string_view mode_view = mode; + std::string_view::size_type sep1 = mode.find('x'); + if (sep1 != std::string_view::npos) + { + std::optional owidth = StringUtil::FromChars(mode_view.substr(0, sep1)); + sep1++; + + while (sep1 < mode.length() && std::isspace(mode[sep1])) + sep1++; + + if (owidth.has_value() && sep1 < mode.length()) + { + std::string_view::size_type sep2 = mode.find('@', sep1); + if (sep2 != std::string_view::npos) + { + std::optional oheight = StringUtil::FromChars(mode_view.substr(sep1, sep2 - sep1)); + sep2++; + + while (sep2 < mode.length() && std::isspace(mode[sep2])) + sep2++; + + if (oheight.has_value() && sep2 < mode.length()) + { + std::optional orefresh_rate = StringUtil::FromChars(mode_view.substr(sep2)); + if (orefresh_rate.has_value()) + { + *width = owidth.value(); + *height = oheight.value(); + *refresh_rate = orefresh_rate.value(); + return true; + } + } + } + } + } + } + + *width = 0; + *height = 0; + *refresh_rate = 0; + return false; +} + +std::string GPUDevice::GetFullscreenModeString(u32 width, u32 height, float refresh_rate) +{ + return StringUtil::StdStringFromFormat("%u x %u @ %f hz", width, height, refresh_rate); +} + +std::string GPUDevice::GetShaderDumpPath(const std::string_view& name) +{ + return Path::Combine(EmuFolders::Dumps, name); +} + +std::array GPUDevice::RGBA8ToFloat(u32 rgba) +{ + return std::array{static_cast(rgba & UINT32_C(0xFF)) * (1.0f / 255.0f), + static_cast((rgba >> 8) & UINT32_C(0xFF)) * (1.0f / 255.0f), + static_cast((rgba >> 16) & UINT32_C(0xFF)) * (1.0f / 255.0f), + static_cast(rgba >> 24) * (1.0f / 255.0f)}; +} + +bool GPUDevice::UpdateImGuiFontTexture() +{ + ImGuiIO& io = ImGui::GetIO(); + + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + + const u32 pitch = sizeof(u32) * width; + + if (m_imgui_font_texture && m_imgui_font_texture->GetWidth() == static_cast(width) && + m_imgui_font_texture->GetHeight() == static_cast(height) && + m_imgui_font_texture->Update(0, 0, static_cast(width), static_cast(height), pixels, pitch)) + { + io.Fonts->SetTexID(m_imgui_font_texture.get()); + return true; + } + + std::unique_ptr new_font = + CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, pixels, pitch); + if (!new_font) + return false; + + m_imgui_font_texture = std::move(new_font); + io.Fonts->SetTexID(m_imgui_font_texture.get()); + return true; +} + +bool GPUDevice::UsesLowerLeftOrigin() const +{ + const RenderAPI api = GetRenderAPI(); + return (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES); +} + +void GPUDevice::SetDisplayMaxFPS(float max_fps) +{ + m_display_frame_interval = (max_fps > 0.0f) ? (1.0f / max_fps) : 0.0f; +} + +bool GPUDevice::ShouldSkipDisplayingFrame() +{ + if (m_display_frame_interval == 0.0f) + return false; + + const u64 now = Common::Timer::GetCurrentValue(); + const double diff = Common::Timer::ConvertValueToSeconds(now - m_last_frame_displayed_time); + if (diff < m_display_frame_interval) + return true; + + m_last_frame_displayed_time = now; + return false; +} + +void GPUDevice::ThrottlePresentation() +{ + const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; + + const u64 sleep_period = Common::Timer::ConvertNanosecondsToValue(1e+9f / static_cast(throttle_rate)); + const u64 current_ts = Common::Timer::GetCurrentValue(); + + // Allow it to fall behind/run ahead up to 2*period. Sleep isn't that precise, plus we need to + // allow time for the actual rendering. + const u64 max_variance = sleep_period * 2; + if (static_cast(std::abs(static_cast(current_ts - m_last_frame_displayed_time))) > max_variance) + m_last_frame_displayed_time = current_ts + sleep_period; + else + m_last_frame_displayed_time += sleep_period; + + Common::Timer::SleepUntil(m_last_frame_displayed_time, false); +} + +void GPUDevice::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; + m_display_changed = true; +} + +void GPUDevice::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) +{ + DebugAssert(texture); + m_display_texture = texture; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; + m_display_changed = true; +} + +void GPUDevice::SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height) +{ + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; + m_display_changed = true; +} + +void GPUDevice::SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, + s32 active_width, s32 active_height, float display_aspect_ratio) +{ + m_display_width = display_width; + m_display_height = display_height; + m_display_active_left = active_left; + m_display_active_top = active_top; + m_display_active_width = active_width; + m_display_active_height = active_height; + m_display_aspect_ratio = display_aspect_ratio; + m_display_changed = true; +} + +bool GPUDevice::GetHostRefreshRate(float* refresh_rate) +{ + if (m_window_info.surface_refresh_rate > 0.0f) + { + *refresh_rate = m_window_info.surface_refresh_rate; + return true; + } + + return WindowInfo::QueryRefreshRateForWindow(m_window_info, refresh_rate); +} + +bool GPUDevice::SetGPUTimingEnabled(bool enabled) +{ + return false; +} + +float GPUDevice::GetAndResetAccumulatedGPUTime() +{ + return 0.0f; +} + +void GPUDevice::SetSoftwareCursor(std::unique_ptr texture, float scale /*= 1.0f*/) +{ + if (texture) + texture->MakeReadyForSampling(); + + m_cursor_texture = std::move(texture); + m_cursor_texture_scale = scale; +} + +bool GPUDevice::SetSoftwareCursor(const void* pixels, u32 width, u32 height, u32 stride, float scale /*= 1.0f*/) +{ + std::unique_ptr tex = + CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, pixels, stride, false); + if (!tex) + return false; + + SetSoftwareCursor(std::move(tex), scale); + return true; +} + +bool GPUDevice::SetSoftwareCursor(const char* path, float scale /*= 1.0f*/) +{ + auto fp = FileSystem::OpenManagedCFile(path, "rb"); + if (!fp) + { + return false; + } + + int width, height, file_channels; + u8* pixel_data = stbi_load_from_file(fp.get(), &width, &height, &file_channels, 4); + if (!pixel_data) + { + const char* error_reason = stbi_failure_reason(); + Log_ErrorPrintf("Failed to load image from '%s': %s", path, error_reason ? error_reason : "unknown error"); + return false; + } + + std::unique_ptr tex = + CreateTexture(static_cast(width), static_cast(height), 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, pixel_data, sizeof(u32) * static_cast(width), false); + stbi_image_free(pixel_data); + if (!tex) + return false; + + Log_InfoPrintf("Loaded %dx%d image from '%s' for software cursor", width, height, path); + SetSoftwareCursor(std::move(tex), scale); + return true; +} + +void GPUDevice::ClearSoftwareCursor() +{ + m_cursor_texture.reset(); + m_cursor_texture_scale = 1.0f; +} + +bool GPUDevice::IsUsingLinearFiltering() const +{ + return g_settings.display_linear_filtering; +} + +bool GPUDevice::Render(bool skip_present) +{ + // Moved here because there can be draws after UpdateDisplay(). + if (HasDisplayTexture()) + m_display_texture->MakeReadyForSampling(); + + if (skip_present) + { + // Should never return true here.. + if (UNLIKELY(BeginPresent(skip_present))) + Panic("BeginPresent() returned true when skipping..."); + + // Need to kick ImGui state. + ImGui::Render(); + return false; + } + + bool render_frame; + if (HasDisplayTexture()) + { + const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); + render_frame = RenderDisplay(nullptr, left, top, width, height, m_display_texture, m_display_texture_view_x, + m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, + IsUsingLinearFiltering()); + } + else + { + render_frame = BeginPresent(false); + } + + if (!render_frame) + { + // Window minimized etc. + ImGui::Render(); + return false; + } + + SetViewportAndScissor(0, 0, GetWindowWidth(), GetWindowHeight()); + + RenderImGui(); + RenderSoftwareCursor(); + + EndPresent(); + return true; +} + +bool GPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) +{ + const GPUTexture::Format hdformat = HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8; + + std::unique_ptr render_texture = + CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); + if (!render_texture) + return false; + + std::unique_ptr render_fb = CreateFramebuffer(render_texture.get()); + if (!render_fb) + return false; + + ClearRenderTarget(render_texture.get(), 0); + + RenderDisplay(render_fb.get(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), + m_display_texture, m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, + m_display_texture_view_height, IsUsingLinearFiltering()); + + SetFramebuffer(nullptr); + + const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; + out_pixels->resize(width * height); + if (!DownloadTexture(render_texture.get(), 0, 0, width, height, out_pixels->data(), stride)) + return false; + + *out_stride = stride; + *out_format = hdformat; + return true; +} + +bool GPUDevice::RenderDisplay(GPUFramebuffer* target, s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, + s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, + bool linear_filter) +{ + GL_SCOPE("RenderDisplay: %dx%d at %d,%d", left, top, width, height); + + const GPUTexture::Format hdformat = + (target && target->GetRT()) ? target->GetRT()->GetFormat() : m_window_info.surface_format; + const u32 target_width = target ? target->GetWidth() : m_window_info.surface_width; + const u32 target_height = target ? target->GetHeight() : m_window_info.surface_height; + const bool postfx = + (m_post_processing_chain && m_post_processing_chain->CheckTargets(hdformat, target_width, target_height)); + if (postfx) + { + ClearRenderTarget(m_post_processing_chain->GetInputTexture(), 0); + SetFramebuffer(m_post_processing_chain->GetInputFramebuffer()); + } + else + { + if (target) + SetFramebuffer(target); + else if (!BeginPresent(false)) + return false; + } + + SetPipeline(m_display_pipeline.get()); + SetTextureSampler(0, texture, linear_filter ? m_linear_sampler.get() : m_nearest_sampler.get()); + + const bool linear = IsUsingLinearFiltering(); + const float position_adjust = linear ? 0.5f : 0.0f; + const float size_adjust = linear ? 1.0f : 0.0f; + const float uniforms[4] = { + (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), + (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), + (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), + (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; + PushUniformBuffer(uniforms, sizeof(uniforms)); + + SetViewportAndScissor(left, top, width, height); + Draw(3, 0); + + if (postfx) + { + return m_post_processing_chain->Apply(target, left, top, width, height, texture_view_width, texture_view_height); + } + else + { + return true; + } +} + +void GPUDevice::RenderSoftwareCursor() +{ + if (!HasSoftwareCursor()) + return; + + const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); + RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get()); +} + +void GPUDevice::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture) +{ + SetPipeline(m_display_pipeline.get()); + SetTextureSampler(0, texture, m_linear_sampler.get()); + + const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + PushUniformBuffer(uniforms, sizeof(uniforms)); + + SetViewportAndScissor(left, top, width, height); + Draw(3, 0); +} + +void GPUDevice::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, + float* out_width, float* out_height, float* out_left_padding, float* out_top_padding, + float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const +{ + const float window_ratio = static_cast(window_width) / static_cast(window_height); + const float display_aspect_ratio = g_settings.display_stretch ? window_ratio : m_display_aspect_ratio; + const float x_scale = + apply_aspect_ratio ? + (display_aspect_ratio / (static_cast(m_display_width) / static_cast(m_display_height))) : + 1.0f; + const float display_width = g_settings.display_stretch_vertically ? static_cast(m_display_width) : + static_cast(m_display_width) * x_scale; + const float display_height = g_settings.display_stretch_vertically ? static_cast(m_display_height) / x_scale : + static_cast(m_display_height); + const float active_left = g_settings.display_stretch_vertically ? static_cast(m_display_active_left) : + static_cast(m_display_active_left) * x_scale; + const float active_top = g_settings.display_stretch_vertically ? static_cast(m_display_active_top) / x_scale : + static_cast(m_display_active_top); + const float active_width = g_settings.display_stretch_vertically ? + static_cast(m_display_active_width) : + static_cast(m_display_active_width) * x_scale; + const float active_height = g_settings.display_stretch_vertically ? + static_cast(m_display_active_height) / x_scale : + static_cast(m_display_active_height); + if (out_x_scale) + *out_x_scale = x_scale; + + // now fit it within the window + float scale; + if ((display_width / display_height) >= window_ratio) + { + // align in middle vertically + scale = static_cast(window_width) / display_width; + if (g_settings.display_integer_scaling) + scale = std::max(std::floor(scale), 1.0f); + + if (out_left_padding) + { + if (g_settings.display_integer_scaling) + *out_left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); + else + *out_left_padding = 0.0f; + } + if (out_top_padding) + { + switch (g_settings.display_alignment) + { + case DisplayAlignment::RightOrBottom: + *out_top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); + break; + + case DisplayAlignment::Center: + *out_top_padding = + std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); + break; + + case DisplayAlignment::LeftOrTop: + default: + *out_top_padding = 0.0f; + break; + } + } + } + else + { + // align in middle horizontally + scale = static_cast(window_height) / display_height; + if (g_settings.display_integer_scaling) + scale = std::max(std::floor(scale), 1.0f); + + if (out_left_padding) + { + switch (g_settings.display_alignment) + { + case DisplayAlignment::RightOrBottom: + *out_left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); + break; + + case DisplayAlignment::Center: + *out_left_padding = + std::max((static_cast(window_width) - (display_width * scale)) / 2.0f, 0.0f); + break; + + case DisplayAlignment::LeftOrTop: + default: + *out_left_padding = 0.0f; + break; + } + } + + if (out_top_padding) + { + if (g_settings.display_integer_scaling) + *out_top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); + else + *out_top_padding = 0.0f; + } + } + + *out_width = active_width * scale; + *out_height = active_height * scale; + *out_left = active_left * scale; + *out_top = active_top * scale; + if (out_scale) + *out_scale = scale; +} + +std::tuple GPUDevice::CalculateDrawRect(s32 window_width, s32 window_height, + bool apply_aspect_ratio /* = true */) const +{ + float left, top, width, height, left_padding, top_padding; + CalculateDrawRect(window_width, window_height, &left, &top, &width, &height, &left_padding, &top_padding, nullptr, + nullptr, apply_aspect_ratio); + + return std::make_tuple(static_cast(left + left_padding), static_cast(top + top_padding), + static_cast(width), static_cast(height)); +} + +std::tuple GPUDevice::CalculateSoftwareCursorDrawRect() const +{ + return CalculateSoftwareCursorDrawRect(m_mouse_position_x, m_mouse_position_y); +} + +std::tuple GPUDevice::CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const +{ + const float scale = m_window_info.surface_scale * m_cursor_texture_scale; + const u32 cursor_extents_x = static_cast(static_cast(m_cursor_texture->GetWidth()) * scale * 0.5f); + const u32 cursor_extents_y = static_cast(static_cast(m_cursor_texture->GetHeight()) * scale * 0.5f); + + const s32 out_left = cursor_x - cursor_extents_x; + const s32 out_top = cursor_y - cursor_extents_y; + const s32 out_width = cursor_extents_x * 2u; + const s32 out_height = cursor_extents_y * 2u; + + return std::tie(out_left, out_top, out_width, out_height); +} + +std::tuple GPUDevice::ConvertWindowCoordinatesToDisplayCoordinates(s32 window_x, s32 window_y, + s32 window_width, + s32 window_height) const +{ + float left, top, width, height, left_padding, top_padding; + float scale, x_scale; + CalculateDrawRect(window_width, window_height, &left, &top, &width, &height, &left_padding, &top_padding, &scale, + &x_scale); + + // convert coordinates to active display region, then to full display region + const float scaled_display_x = static_cast(window_x) - left_padding; + const float scaled_display_y = static_cast(window_y) - top_padding; + + // scale back to internal resolution + const float display_x = scaled_display_x / scale / x_scale; + const float display_y = scaled_display_y / scale; + + return std::make_tuple(display_x, display_y); +} + +static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, + bool clear_alpha, bool flip_y, u32 resize_width, u32 resize_height, + std::vector texture_data, u32 texture_data_stride, + GPUTexture::Format texture_format) +{ + + const char* extension = std::strrchr(filename.c_str(), '.'); + if (!extension) + { + Log_ErrorPrintf("Unable to determine file extension for '%s'", filename.c_str()); + return false; + } + + if (!GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) + return false; + + if (clear_alpha) + { + for (u32& pixel : texture_data) + pixel |= 0xFF000000; + } + + if (flip_y) + GPUTexture::FlipTextureDataRGBA8(width, height, texture_data, texture_data_stride); + + if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height)) + { + std::vector resized_texture_data(resize_width * resize_height); + u32 resized_texture_stride = sizeof(u32) * resize_width; + if (!stbir_resize_uint8(reinterpret_cast(texture_data.data()), width, height, texture_data_stride, + reinterpret_cast(resized_texture_data.data()), resize_width, resize_height, + resized_texture_stride, 4)) + { + Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height); + return false; + } + + width = resize_width; + height = resize_height; + texture_data = std::move(resized_texture_data); + texture_data_stride = resized_texture_stride; + } + + const auto write_func = [](void* context, void* data, int size) { + std::fwrite(data, 1, size, static_cast(context)); + }; + + bool result = false; + if (StringUtil::Strcasecmp(extension, ".png") == 0) + { + result = + (stbi_write_png_to_func(write_func, fp.get(), width, height, 4, texture_data.data(), texture_data_stride) != 0); + } + else if (StringUtil::Strcasecmp(extension, ".jpg") == 0) + { + result = (stbi_write_jpg_to_func(write_func, fp.get(), width, height, 4, texture_data.data(), 95) != 0); + } + else if (StringUtil::Strcasecmp(extension, ".tga") == 0) + { + result = (stbi_write_tga_to_func(write_func, fp.get(), width, height, 4, texture_data.data()) != 0); + } + else if (StringUtil::Strcasecmp(extension, ".bmp") == 0) + { + result = (stbi_write_bmp_to_func(write_func, fp.get(), width, height, 4, texture_data.data()) != 0); + } + + if (!result) + { + Log_ErrorPrintf("Unknown extension in filename '%s' or save error: '%s'", filename.c_str(), extension); + return false; + } + + return true; +} + +bool GPUDevice::WriteTextureToFile(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, std::string filename, + bool clear_alpha /* = true */, bool flip_y /* = false */, u32 resize_width /* = 0 */, + u32 resize_height /* = 0 */, bool compress_on_thread /* = false */) +{ + std::vector texture_data(width * height); + u32 texture_data_stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(texture->GetFormat()) * width, 4); + if (!DownloadTexture(texture, x, y, width, height, texture_data.data(), texture_data_stride)) + { + Log_ErrorPrintf("Texture download failed"); + return false; + } + + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); + if (!fp) + { + Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); + return false; + } + + if (!compress_on_thread) + { + return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), clear_alpha, flip_y, + resize_width, resize_height, std::move(texture_data), texture_data_stride, + texture->GetFormat()); + } + + std::thread compress_thread(CompressAndWriteTextureToFile, width, height, std::move(filename), std::move(fp), + clear_alpha, flip_y, resize_width, resize_height, std::move(texture_data), + texture_data_stride, texture->GetFormat()); + compress_thread.detach(); + return true; +} + +bool GPUDevice::WriteDisplayTextureToFile(std::string filename, bool full_resolution /* = true */, + bool apply_aspect_ratio /* = true */, bool compress_on_thread /* = false */) +{ + if (!m_display_texture) + return false; + + s32 resize_width = 0; + s32 resize_height = std::abs(m_display_texture_view_height); + if (apply_aspect_ratio) + { + const float ss_width_scale = static_cast(m_display_active_width) / static_cast(m_display_width); + const float ss_height_scale = static_cast(m_display_active_height) / static_cast(m_display_height); + const float ss_aspect_ratio = m_display_aspect_ratio * ss_width_scale / ss_height_scale; + resize_width = g_settings.display_stretch_vertically ? + m_display_texture_view_width : + static_cast(static_cast(resize_height) * ss_aspect_ratio); + resize_height = g_settings.display_stretch_vertically ? + static_cast(static_cast(resize_height) / + (m_display_aspect_ratio / + (static_cast(m_display_width) / static_cast(m_display_height)))) : + resize_height; + } + else + { + resize_width = m_display_texture_view_width; + } + + if (!full_resolution) + { + const s32 resolution_scale = std::abs(m_display_texture_view_height) / m_display_active_height; + resize_height /= resolution_scale; + resize_width /= resolution_scale; + } + + if (resize_width <= 0 || resize_height <= 0) + return false; + + const bool flip_y = (m_display_texture_view_height < 0); + s32 read_height = m_display_texture_view_height; + s32 read_y = m_display_texture_view_y; + if (flip_y) + { + read_height = -m_display_texture_view_height; + read_y = + (m_display_texture->GetHeight() - read_height) - (m_display_texture->GetHeight() - m_display_texture_view_y); + } + + return WriteTextureToFile(m_display_texture, m_display_texture_view_x, read_y, m_display_texture_view_width, + read_height, std::move(filename), true, flip_y, static_cast(resize_width), + static_cast(resize_height), compress_on_thread); +} + +bool GPUDevice::WriteDisplayTextureToBuffer(std::vector* buffer, u32 resize_width /* = 0 */, + u32 resize_height /* = 0 */, bool clear_alpha /* = true */) +{ + if (!m_display_texture) + return false; + + const bool flip_y = (m_display_texture_view_height < 0); + s32 read_width = m_display_texture_view_width; + s32 read_height = m_display_texture_view_height; + s32 read_x = m_display_texture_view_x; + s32 read_y = m_display_texture_view_y; + if (flip_y) + { + read_height = -m_display_texture_view_height; + read_y = + (m_display_texture->GetHeight() - read_height) - (m_display_texture->GetHeight() - m_display_texture_view_y); + } + + u32 width = static_cast(read_width); + u32 height = static_cast(read_height); + std::vector texture_data(width * height); + u32 texture_data_stride = Common::AlignUpPow2(m_display_texture->GetPixelSize() * width, 4); + if (!DownloadTexture(m_display_texture, read_x, read_y, width, height, texture_data.data(), texture_data_stride)) + { + Log_ErrorPrintf("Failed to download texture from GPU."); + return false; + } + + if (!GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, + m_display_texture->GetFormat())) + { + return false; + } + + if (clear_alpha) + { + for (u32& pixel : texture_data) + pixel |= 0xFF000000; + } + + if (flip_y) + { + std::vector temp(width); + for (u32 flip_row = 0; flip_row < (height / 2); flip_row++) + { + u32* top_ptr = &texture_data[flip_row * width]; + u32* bottom_ptr = &texture_data[((height - 1) - flip_row) * width]; + std::memcpy(temp.data(), top_ptr, texture_data_stride); + std::memcpy(top_ptr, bottom_ptr, texture_data_stride); + std::memcpy(bottom_ptr, temp.data(), texture_data_stride); + } + } + + if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height)) + { + std::vector resized_texture_data(resize_width * resize_height); + u32 resized_texture_stride = sizeof(u32) * resize_width; + if (!stbir_resize_uint8(reinterpret_cast(texture_data.data()), width, height, texture_data_stride, + reinterpret_cast(resized_texture_data.data()), resize_width, resize_height, + resized_texture_stride, 4)) + { + Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height); + return false; + } + + width = resize_width; + height = resize_height; + *buffer = std::move(resized_texture_data); + texture_data_stride = resized_texture_stride; + } + else + { + *buffer = texture_data; + } + + return true; +} + +bool GPUDevice::WriteScreenshotToFile(std::string filename, bool internal_resolution /* = false */, + bool compress_on_thread /* = false */) +{ + u32 width = m_window_info.surface_width; + u32 height = m_window_info.surface_height; + auto [draw_left, draw_top, draw_width, draw_height] = CalculateDrawRect(width, height); + + if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) + { + // If internal res, scale the computed draw rectangle to the internal res. + // We re-use the draw rect because it's already been AR corrected. + const float sar = + static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); + const float dar = static_cast(draw_width) / static_cast(draw_height); + if (sar >= dar) + { + // stretch height, preserve width + const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); + width = m_display_texture_view_width; + height = static_cast(std::round(static_cast(draw_height) * scale)); + } + else + { + // stretch width, preserve height + const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); + width = static_cast(std::round(static_cast(draw_width) * scale)); + height = m_display_texture_view_height; + } + + // DX11 won't go past 16K texture size. + constexpr u32 MAX_TEXTURE_SIZE = 16384; + if (width > MAX_TEXTURE_SIZE) + { + height = static_cast(static_cast(height) / + (static_cast(width) / static_cast(MAX_TEXTURE_SIZE))); + width = MAX_TEXTURE_SIZE; + } + if (height > MAX_TEXTURE_SIZE) + { + height = MAX_TEXTURE_SIZE; + width = static_cast(static_cast(width) / + (static_cast(height) / static_cast(MAX_TEXTURE_SIZE))); + } + + // Remove padding, it's not part of the framebuffer. + draw_left = 0; + draw_top = 0; + draw_width = static_cast(width); + draw_height = static_cast(height); + } + if (width == 0 || height == 0) + return false; + + std::vector pixels; + u32 pixels_stride; + GPUTexture::Format pixels_format; + if (!RenderScreenshot(width, height, + Common::Rectangle::FromExtents(draw_left, draw_top, draw_width, draw_height), &pixels, + &pixels_stride, &pixels_format)) + { + Log_ErrorPrintf("Failed to render %ux%u screenshot", width, height); + return false; + } + + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); + if (!fp) + { + Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); + return false; + } + + if (!compress_on_thread) + { + return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), true, UsesLowerLeftOrigin(), + width, height, std::move(pixels), pixels_stride, pixels_format); + } + + std::thread compress_thread(CompressAndWriteTextureToFile, width, height, std::move(filename), std::move(fp), true, + UsesLowerLeftOrigin(), width, height, std::move(pixels), pixels_stride, pixels_format); + compress_thread.detach(); + return true; +} + +std::unique_ptr GPUDevice::CreateDeviceForAPI(RenderAPI api) +{ + switch (api) + { +#ifdef WITH_VULKAN + case RenderAPI::Vulkan: + return std::make_unique(); +#endif + +#ifdef WITH_OPENGL + case RenderAPI::OpenGL: + case RenderAPI::OpenGLES: + return std::make_unique(); +#endif + +#ifdef _WIN32 + case RenderAPI::D3D12: + return std::make_unique(); + + case RenderAPI::D3D11: + return std::make_unique(); +#endif + +#ifdef __APPLE__ + case RenderAPI::Metal: + return WrapNewMetalDevice(); +#endif + + default: + return {}; + } +} diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h new file mode 100644 index 000000000..12f60ad04 --- /dev/null +++ b/src/util/gpu_device.h @@ -0,0 +1,803 @@ +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_shader_cache.h" +#include "gpu_texture.h" +#include "window_info.h" + +#include "common/bitfield.h" +#include "common/heap_array.h" +#include "common/rectangle.h" +#include "common/types.h" + +#include "gsl/span" + +#include +#include +#include +#include +#include +#include + +enum class RenderAPI : u32 +{ + None, + D3D11, + D3D12, + Vulkan, + OpenGL, + OpenGLES, + Metal +}; + +class GPUFramebuffer +{ +public: + GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height); + virtual ~GPUFramebuffer(); + + ALWAYS_INLINE GPUTexture* GetRT() const { return m_rt; } + ALWAYS_INLINE GPUTexture* GetDS() const { return m_ds; } + + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + + virtual void SetDebugName(const std::string_view& name) = 0; + +protected: + GPUTexture* m_rt; + GPUTexture* m_ds; + u32 m_width; + u32 m_height; +}; + +class GPUSampler +{ +public: + enum class Filter + { + Nearest, + Linear, + + MaxCount + }; + + enum class AddressMode + { + Repeat, + ClampToEdge, + ClampToBorder, + + MaxCount + }; + + union Config + { + static constexpr u8 LOD_MAX = 15; + + BitField min_filter; + BitField mag_filter; + BitField mip_filter; + BitField address_u; + BitField address_v; + BitField address_w; + BitField anisotropy; + BitField min_lod; + BitField max_lod; + BitField border_color; + u64 key; + + // clang-format off + ALWAYS_INLINE float GetBorderRed() const { return static_cast(border_color.GetValue() & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetBorderGreen() const { return static_cast((border_color.GetValue() >> 8) & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetBorderBlue() const { return static_cast((border_color.GetValue() >> 16) & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetBorderAlpha() const { return static_cast((border_color.GetValue() >> 24) & 0xFF) / 255.0f; } + // clang-format on + ALWAYS_INLINE std::array GetBorderFloatColor() const + { + return std::array{GetBorderRed(), GetBorderGreen(), GetBorderBlue(), GetBorderAlpha()}; + } + }; + + GPUSampler(); + virtual ~GPUSampler(); + + virtual void SetDebugName(const std::string_view& name) = 0; + + static Config GetNearestConfig(); + static Config GetLinearConfig(); +}; + +enum class GPUShaderStage : u8 +{ + Vertex, + Fragment, + Compute, + + MaxCount +}; + +class GPUShader +{ +public: + GPUShader(GPUShaderStage stage); + virtual ~GPUShader(); + + static const char* GetStageName(GPUShaderStage stage); + + ALWAYS_INLINE GPUShaderStage GetStage() const { return m_stage; } + + virtual void SetDebugName(const std::string_view& name) = 0; + +protected: + GPUShaderStage m_stage; +}; + +class GPUPipeline +{ +public: + enum class Layout : u8 + { + // 1 streamed UBO, 1 texture in PS. + SingleTextureAndUBO, + + // 128 byte UBO via push constants, 1 texture. + SingleTextureAndPushConstants, + + // 128 byte UBO via push constants, 1 texture buffer/SSBO. + SingleTextureBufferAndPushConstants, + + // Multiple textures, 1 streamed UBO. + MultiTextureAndUBO, + + // Multiple textures, 128 byte UBO via push constants. + MultiTextureAndPushConstants, + + MaxCount + }; + + enum class Primitive : u8 + { + Points, + Lines, + Triangles, + TriangleStrips, + + MaxCount + }; + + union VertexAttribute + { + static constexpr u32 MaxAttributes = 16; + + enum class Semantic : u8 + { + Position, + TexCoord, + Color, + + MaxCount + }; + + enum class Type : u8 + { + Float, + UInt8, + SInt8, + UNorm8, + UInt16, + SInt16, + UNorm16, + UInt32, + SInt32, + + MaxCount + }; + + BitField index; + BitField semantic; + BitField semantic_index; + BitField type; + BitField components; + BitField offset; + + u32 key; + + // clang-format off + ALWAYS_INLINE VertexAttribute& operator=(const VertexAttribute& rhs) { key = rhs.key; return *this; } + ALWAYS_INLINE bool operator==(const VertexAttribute& rhs) const { return key == rhs.key; } + ALWAYS_INLINE bool operator!=(const VertexAttribute& rhs) const { return key != rhs.key; } + ALWAYS_INLINE bool operator<(const VertexAttribute& rhs) const { return key < rhs.key; } + // clang-format on + + static constexpr VertexAttribute Make(u8 index, Semantic semantic, u8 semantic_index, Type type, u8 components, + u16 offset) + { + // Nasty :/ can't access an inactive element of a union here.. + return VertexAttribute{{(static_cast(index) & 0xf) | ((static_cast(semantic) & 0x3) << 4) | + ((static_cast(semantic_index) & 0x3) << 6) | ((static_cast(type) & 0xf) << 8) | + ((static_cast(components) & 0x7) << 12) | + ((static_cast(offset) & 0xffff) << 16)}}; + } + }; + + struct InputLayout + { + gsl::span vertex_attributes; + u32 vertex_stride; + + bool operator==(const InputLayout& rhs) const; + bool operator!=(const InputLayout& rhs) const; + }; + + struct InputLayoutHash + { + size_t operator()(const InputLayout& il) const; + }; + + enum class CullMode : u8 + { + None, + Front, + Back, + + MaxCount + }; + + enum class DepthFunc : u8 + { + Never, + Always, + Less, + LessEqual, + Greater, + GreaterEqual, + Equal, + + MaxCount + }; + + enum class BlendFunc : u8 + { + Zero, + One, + SrcColor, + InvSrcColor, + DstColor, + InvDstColor, + SrcAlpha, + InvSrcAlpha, + SrcAlpha1, + InvSrcAlpha1, + DstAlpha, + InvDstAlpha, + ConstantColor, + InvConstantColor, + + MaxCount + }; + + enum class BlendOp : u8 + { + Add, + Subtract, + ReverseSubtract, + Min, + Max, + + MaxCount + }; + + // TODO: purge this? + union RasterizationState + { + BitField cull_mode; + u8 key; + + // clang-format off + ALWAYS_INLINE RasterizationState& operator=(const RasterizationState& rhs) { key = rhs.key; return *this; } + ALWAYS_INLINE bool operator==(const RasterizationState& rhs) const { return key == rhs.key; } + ALWAYS_INLINE bool operator!=(const RasterizationState& rhs) const { return key != rhs.key; } + ALWAYS_INLINE bool operator<(const RasterizationState& rhs) const { return key < rhs.key; } + // clang-format on + + static RasterizationState GetNoCullState(); + }; + + union DepthState + { + BitField depth_test; + BitField depth_write; + u8 key; + + // clang-format off + ALWAYS_INLINE DepthState& operator=(const DepthState& rhs) { key = rhs.key; return *this; } + ALWAYS_INLINE bool operator==(const DepthState& rhs) const { return key == rhs.key; } + ALWAYS_INLINE bool operator!=(const DepthState& rhs) const { return key != rhs.key; } + ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; } + // clang-format on + + static DepthState GetNoTestsState(); + static DepthState GetAlwaysWriteState(); + }; + + union BlendState + { + BitField enable; + BitField src_blend; + BitField src_alpha_blend; + BitField dst_blend; + BitField dst_alpha_blend; + BitField blend_op; + BitField alpha_blend_op; + BitField write_r; + BitField write_g; + BitField write_b; + BitField write_a; + BitField write_mask; + BitField constant; + u64 key; + + // clang-format off + ALWAYS_INLINE BlendState& operator=(const BlendState& rhs) { key = rhs.key; return *this; } + ALWAYS_INLINE bool operator==(const BlendState& rhs) const { return key == rhs.key; } + ALWAYS_INLINE bool operator!=(const BlendState& rhs) const { return key != rhs.key; } + ALWAYS_INLINE bool operator<(const BlendState& rhs) const { return key < rhs.key; } + // clang-format on + + // clang-format off + ALWAYS_INLINE float GetConstantRed() const { return static_cast(constant.GetValue() & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetConstantGreen() const { return static_cast((constant.GetValue() >> 8) & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetConstantBlue() const { return static_cast((constant.GetValue() >> 16) & 0xFF) / 255.0f; } + ALWAYS_INLINE float GetConstantAlpha() const { return static_cast((constant.GetValue() >> 24) & 0xFF) / 255.0f; } + // clang-format on + ALWAYS_INLINE std::array GetConstantFloatColor() const + { + return std::array{GetConstantRed(), GetConstantGreen(), GetConstantBlue(), GetConstantAlpha()}; + } + + static BlendState GetNoBlendingState(); + static BlendState GetAlphaBlendingState(); + }; + + struct GraphicsConfig + { + Layout layout; + + Primitive primitive; + InputLayout input_layout; + + RasterizationState rasterization; + DepthState depth; + BlendState blend; + + const GPUShader* vertex_shader; + const GPUShader* fragment_shader; + + GPUTexture::Format color_format; + GPUTexture::Format depth_format; + u32 samples; + bool per_sample_shading; + }; + + GPUPipeline(); + virtual ~GPUPipeline(); + + virtual void SetDebugName(const std::string_view& name) = 0; +}; + +class GPUTextureBuffer +{ +public: + enum class Format + { + R16UI, + + MaxCount + }; + + GPUTextureBuffer(Format format, u32 size_in_elements); + virtual ~GPUTextureBuffer(); + + static u32 GetElementSize(Format format); + + ALWAYS_INLINE Format GetFormat() const { return m_format; } + ALWAYS_INLINE u32 GetSizeInElements() const { return m_size_in_elements; } + ALWAYS_INLINE u32 GetSizeInBytes() const { return m_size_in_elements * GetElementSize(m_format); } + ALWAYS_INLINE u32 GetCurrentPosition() const { return m_current_position; } + + virtual void* Map(u32 required_elements) = 0; + virtual void Unmap(u32 used_elements) = 0; + + virtual void SetDebugName(const std::string_view& name) = 0; + +protected: + Format m_format; + u32 m_size_in_elements; + u32 m_current_position; +}; + +// TODO: remove +class PostProcessingChain; + +class GPUDevice +{ +public: + // TODO: drop virtuals + // TODO: gpu crash handling on present + using DrawIndex = u16; + + struct Features + { + bool dual_source_blend : 1; + bool per_sample_shading : 1; + bool noperspective_interpolation : 1; + bool supports_texture_buffers : 1; + bool texture_buffers_emulated_with_ssbo : 1; + bool partial_msaa_resolve : 1; + bool gpu_timing : 1; + bool shader_cache : 1; + bool pipeline_cache : 1; + }; + + struct AdapterAndModeList + { + std::vector adapter_names; + std::vector fullscreen_modes; + }; + + static constexpr u32 MAX_TEXTURE_SAMPLERS = 8; + + virtual ~GPUDevice(); + + /// Returns the default/preferred API for the system. + static RenderAPI GetPreferredAPI(); + + /// Returns a string representing the specified API. + static const char* RenderAPIToString(RenderAPI api); + + /// Returns a new device for the specified API. + static std::unique_ptr CreateDeviceForAPI(RenderAPI api); + + /// Parses a fullscreen mode into its components (width * height @ refresh hz) + static bool GetRequestedExclusiveFullscreenMode(u32* width, u32* height, float* refresh_rate); + + /// Converts a fullscreen mode to a string. + static std::string GetFullscreenModeString(u32 width, u32 height, float refresh_rate); + + /// Returns the directory bad shaders are saved to. + static std::string GetShaderDumpPath(const std::string_view& name); + + /// Converts a RGBA8 value to 4 floating-point values. + static std::array RGBA8ToFloat(u32 rgba); + + /// Returns the number of texture bindings for a given pipeline layout. + static constexpr u32 GetActiveTexturesForLayout(GPUPipeline::Layout layout) + { + constexpr std::array(GPUPipeline::Layout::MaxCount)> counts = { + 1, // SingleTextureAndUBO + 1, // SingleTextureAndPushConstants + 0, // SingleTextureBufferAndPushConstants + MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO + MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants + }; + + return counts[static_cast(layout)]; + } + +#ifdef __APPLE__ + // We have to define these in the base class, because they're in Objective C++. + static std::unique_ptr WrapNewMetalDevice(); + static AdapterAndModeList WrapGetMetalAdapterAndModeList(); +#endif + + ALWAYS_INLINE const Features& GetFeatures() const { return m_features; } + ALWAYS_INLINE u32 GetMaxTextureSize() const { return m_max_texture_size; } + ALWAYS_INLINE u32 GetMaxMultisamples() const { return m_max_multisamples; } + + ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; } + ALWAYS_INLINE s32 GetWindowWidth() const { return static_cast(m_window_info.surface_width); } + ALWAYS_INLINE s32 GetWindowHeight() const { return static_cast(m_window_info.surface_height); } + ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; } + ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; } + + ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); } + ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); } + + // Position is relative to the top-left corner of the window. + ALWAYS_INLINE s32 GetMousePositionX() const { return m_mouse_position_x; } + ALWAYS_INLINE s32 GetMousePositionY() const { return m_mouse_position_y; } + ALWAYS_INLINE void SetMousePosition(s32 x, s32 y) + { + m_mouse_position_x = x; + m_mouse_position_y = y; + } + + ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture; } + ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } + ALWAYS_INLINE s32 GetDisplayHeight() const { return m_display_height; } + ALWAYS_INLINE float GetDisplayAspectRatio() const { return m_display_aspect_ratio; } + ALWAYS_INLINE bool IsGPUTimingEnabled() const { return m_gpu_timing_enabled; } + + virtual RenderAPI GetRenderAPI() const = 0; + + bool Create(const std::string_view& adapter, const std::string_view& shader_cache_path, u32 shader_cache_version, + bool debug_device, bool vsync, bool threaded_presentation); + void Destroy(); + + virtual bool HasSurface() const = 0; + virtual void DestroySurface() = 0; + virtual bool UpdateWindow() = 0; + + virtual bool SupportsExclusiveFullscreen() const; + virtual AdapterAndModeList GetAdapterAndModeList() = 0; + + bool SetPostProcessingChain(const std::string_view& config); + + /// Call when the window size changes externally to recreate any resources. + virtual void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) = 0; + + virtual std::string GetDriverInfo() const = 0; + + /// Creates an abstracted RGBA8 texture. If dynamic, the texture can be updated with UpdateTexture() below. + virtual std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) = 0; + virtual std::unique_ptr CreateSampler(const GPUSampler::Config& config) = 0; + virtual std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) = 0; + + virtual bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) = 0; + virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0; + virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0; + virtual void ClearRenderTarget(GPUTexture* t, u32 c); + virtual void ClearDepth(GPUTexture* t, float d); + virtual void InvalidateRenderTarget(GPUTexture* t); + + /// Framebuffer abstraction. + virtual std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) = 0; + + /// Shader abstraction. + std::unique_ptr CreateShader(GPUShaderStage stage, const std::string_view& source, + const char* entry_point = "main"); + virtual std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) = 0; + + /// Debug messaging. + virtual void PushDebugGroup(const char* fmt, ...) = 0; + virtual void PopDebugGroup() = 0; + virtual void InsertDebugMessage(const char* fmt, ...) = 0; + + /// Vertex/index buffer abstraction. + virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) = 0; + virtual void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) = 0; + virtual void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) = 0; + virtual void UnmapIndexBuffer(u32 used_size) = 0; + + void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex); + void UploadIndexBuffer(const DrawIndex* indices, u32 index_count, u32* base_index); + + /// Uniform buffer abstraction. + virtual void PushUniformBuffer(const void* data, u32 data_size) = 0; + virtual void* MapUniformBuffer(u32 size) = 0; + virtual void UnmapUniformBuffer(u32 size) = 0; + void UploadUniformBuffer(const void* data, u32 data_size); + + /// Drawing setup abstraction. + virtual void SetFramebuffer(GPUFramebuffer* fb) = 0; + virtual void SetPipeline(GPUPipeline* pipeline) = 0; + virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; + virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; + virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle + virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0; + void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); + + // Drawing abstraction. + virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; + virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0; + + /// Returns false if the window was completely occluded. + virtual bool BeginPresent(bool skip_present) = 0; + virtual void EndPresent() = 0; + bool Render(bool skip_present); + + /// Renders the display with postprocessing to the specified image. + bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, + u32* out_stride, GPUTexture::Format* out_format); + + ALWAYS_INLINE bool IsVsyncEnabled() const { return m_vsync_enabled; } + virtual void SetVSync(bool enabled) = 0; + + ALWAYS_INLINE bool IsDebugDevice() const { return m_debug_device; } + + bool UpdateImGuiFontTexture(); + bool UsesLowerLeftOrigin() const; + void SetDisplayMaxFPS(float max_fps); + bool ShouldSkipDisplayingFrame(); + void ThrottlePresentation(); + + void ClearDisplayTexture(); + void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height); + void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height); + void SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, s32 active_width, + s32 active_height, float display_aspect_ratio); + + virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0; + + virtual bool GetHostRefreshRate(float* refresh_rate); + + /// Enables/disables GPU frame timing. + virtual bool SetGPUTimingEnabled(bool enabled); + + /// Returns the amount of GPU time utilized since the last time this method was called. + virtual float GetAndResetAccumulatedGPUTime(); + + /// Sets the software cursor to the specified texture. Ownership of the texture is transferred. + void SetSoftwareCursor(std::unique_ptr texture, float scale = 1.0f); + + /// Sets the software cursor to the specified image. + bool SetSoftwareCursor(const void* pixels, u32 width, u32 height, u32 stride, float scale = 1.0f); + + /// Sets the software cursor to the specified path (png image). + bool SetSoftwareCursor(const char* path, float scale = 1.0f); + + /// Disables the software cursor. + void ClearSoftwareCursor(); + + /// Helper function for computing the draw rectangle in a larger window. + std::tuple CalculateDrawRect(s32 window_width, s32 window_height, + bool apply_aspect_ratio = true) const; + + /// Helper function for converting window coordinates to display coordinates. + std::tuple ConvertWindowCoordinatesToDisplayCoordinates(s32 window_x, s32 window_y, s32 window_width, + s32 window_height) const; + + /// Helper function to save texture data to a PNG. If flip_y is set, the image will be flipped aka OpenGL. + bool WriteTextureToFile(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, std::string filename, + bool clear_alpha = true, bool flip_y = false, u32 resize_width = 0, u32 resize_height = 0, + bool compress_on_thread = false); + + /// Helper function to save current display texture to PNG. + bool WriteDisplayTextureToFile(std::string filename, bool full_resolution = true, bool apply_aspect_ratio = true, + bool compress_on_thread = false); + + /// Helper function to save current display texture to a buffer. + bool WriteDisplayTextureToBuffer(std::vector* buffer, u32 resize_width = 0, u32 resize_height = 0, + bool clear_alpha = true); + + /// Helper function to save screenshot to PNG. + bool WriteScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false); + +protected: + virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) = 0; + virtual void DestroyDevice() = 0; + + std::string GetShaderCacheBaseName(const std::string_view& type) const; + virtual bool ReadPipelineCache(const std::string& filename); + virtual bool GetPipelineCacheData(DynamicHeapArray* data); + + virtual std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) = 0; + virtual std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary) = 0; + + bool AcquireWindow(bool recreate_window); + + Features m_features = {}; + u32 m_max_texture_size = 0; + u32 m_max_multisamples = 0; + + WindowInfo m_window_info; + + GPUShaderCache m_shader_cache; + + std::unique_ptr m_nearest_sampler; + std::unique_ptr m_linear_sampler; + + bool m_gpu_timing_enabled = false; + bool m_vsync_enabled = false; + bool m_debug_device = false; + +private: + ALWAYS_INLINE bool HasSoftwareCursor() const { return static_cast(m_cursor_texture); } + ALWAYS_INLINE bool HasDisplayTexture() const { return (m_display_texture != nullptr); } + + void OpenShaderCache(const std::string_view& base_path, u32 version); + void CloseShaderCache(); + bool CreateResources(); + void DestroyResources(); + + bool IsUsingLinearFiltering() const; + + void CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, float* out_width, + float* out_height, float* out_left_padding, float* out_top_padding, float* out_scale, + float* out_x_scale, bool apply_aspect_ratio = true) const; + + std::tuple CalculateSoftwareCursorDrawRect() const; + std::tuple CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const; + + void RenderImGui(); + + void RenderSoftwareCursor(); + + bool RenderDisplay(GPUFramebuffer* target, s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, + s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, + bool linear_filter); + void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture); + + u64 m_last_frame_displayed_time = 0; + + s32 m_mouse_position_x = 0; + s32 m_mouse_position_y = 0; + + s32 m_display_width = 0; + s32 m_display_height = 0; + s32 m_display_active_left = 0; + s32 m_display_active_top = 0; + s32 m_display_active_width = 0; + s32 m_display_active_height = 0; + float m_display_aspect_ratio = 1.0f; + float m_display_frame_interval = 0.0f; + + std::unique_ptr m_display_pipeline; + GPUTexture* m_display_texture = nullptr; + s32 m_display_texture_view_x = 0; + s32 m_display_texture_view_y = 0; + s32 m_display_texture_view_width = 0; + s32 m_display_texture_view_height = 0; + + std::unique_ptr m_imgui_pipeline; + std::unique_ptr m_imgui_font_texture; + + std::unique_ptr m_cursor_pipeline; + std::unique_ptr m_cursor_texture; + float m_cursor_texture_scale = 1.0f; + + bool m_display_changed = false; + + std::unique_ptr m_post_processing_chain; +}; + +extern std::unique_ptr g_gpu_device; + +namespace Host { +/// Called when the core is creating a render device. +/// This could also be fullscreen transition. +std::optional AcquireRenderWindow(bool recreate_window); + +/// Called when the core is finished with a render window. +void ReleaseRenderWindow(); + +/// Returns true if the hosting application is currently fullscreen. +bool IsFullscreen(); + +/// Alters fullscreen state of hosting application. +void SetFullscreen(bool enabled); +} // namespace Host + +// Macros for debug messages. +#ifdef _DEBUG +struct GLAutoPop +{ + GLAutoPop(int dummy) {} + ~GLAutoPop() { g_gpu_device->PopDebugGroup(); } +}; + +#define GL_SCOPE(...) GLAutoPop gl_auto_pop((g_gpu_device->PushDebugGroup(__VA_ARGS__), 0)) +#define GL_PUSH(...) g_gpu_device->PushDebugGroup(__VA_ARGS__) +#define GL_POP() g_gpu_device->PopDebugGroup() +#define GL_INS(...) g_gpu_device->InsertDebugMessage(__VA_ARGS__) +#define GL_OBJECT_NAME(obj, ...) (obj)->SetDebugName(StringUtil::StdStringFromFormat(__VA_ARGS__)) +#else +#define GL_SCOPE(...) (void)0 +#define GL_PUSH(...) (void)0 +#define GL_POP() (void)0 +#define GL_INS(...) (void)0 +#define GL_OBJECT_NAME(...) (void)0 +#endif diff --git a/src/util/gpu_shader_cache.cpp b/src/util/gpu_shader_cache.cpp new file mode 100644 index 000000000..5638acf53 --- /dev/null +++ b/src/util/gpu_shader_cache.cpp @@ -0,0 +1,320 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_shader_cache.h" +#include "gpu_device.h" + +#include "common/file_system.h" +#include "common/heap_array.h" +#include "common/log.h" +#include "common/md5_digest.h" + +#include "fmt/format.h" + +#include "zstd.h" +#include "zstd_errors.h" + +Log_SetChannel(GPUShaderCache); + +#pragma pack(push, 1) +struct CacheIndexEntry +{ + u32 shader_type; + u32 source_length; + u64 source_hash_low; + u64 source_hash_high; + u64 entry_point_low; + u64 entry_point_high; + u32 file_offset; + u32 compressed_size; + u32 uncompressed_size; +}; +#pragma pack(pop) + +GPUShaderCache::GPUShaderCache() = default; + +GPUShaderCache::~GPUShaderCache() +{ + Close(); +} + +bool GPUShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const +{ + return (std::memcmp(this, &key, sizeof(*this)) == 0); +} + +bool GPUShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const +{ + return (std::memcmp(this, &key, sizeof(*this)) != 0); +} + +std::size_t GPUShaderCache::CacheIndexEntryHash::operator()(const CacheIndexKey& e) const noexcept +{ + std::size_t h = 0; + hash_combine(h, e.entry_point_low, e.entry_point_high, e.source_hash_low, e.source_hash_high, e.source_length, + e.shader_type); + return h; +} + +bool GPUShaderCache::Open(const std::string_view& base_filename, u32 version) +{ + m_base_filename = base_filename; + m_version = version; + + if (base_filename.empty()) + return true; + + const std::string index_filename = fmt::format("{}.idx", m_base_filename); + const std::string blob_filename = fmt::format("{}.bin", m_base_filename); + return ReadExisting(index_filename, blob_filename); +} + +bool GPUShaderCache::Create() +{ + const std::string index_filename = fmt::format("{}.idx", m_base_filename); + const std::string blob_filename = fmt::format("{}.bin", m_base_filename); + return CreateNew(index_filename, blob_filename); +} + +void GPUShaderCache::Close() +{ + if (m_index_file) + { + std::fclose(m_index_file); + m_index_file = nullptr; + } + if (m_blob_file) + { + std::fclose(m_blob_file); + m_blob_file = nullptr; + } +} + +void GPUShaderCache::Clear() +{ + if (!IsOpen()) + return; + + Close(); + + Log_WarningPrintf("Clearing shader cache at %s.", m_base_filename.c_str()); + + const std::string index_filename = fmt::format("{}.idx", m_base_filename); + const std::string blob_filename = fmt::format("{}.bin", m_base_filename); + CreateNew(index_filename, blob_filename); +} + +bool GPUShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename) +{ + if (FileSystem::FileExists(index_filename.c_str())) + { + Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); + FileSystem::DeleteFile(index_filename.c_str()); + } + if (FileSystem::FileExists(blob_filename.c_str())) + { + Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); + FileSystem::DeleteFile(blob_filename.c_str()); + } + + m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); + if (!m_index_file) + { + Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); + return false; + } + + if (std::fwrite(&m_version, sizeof(m_version), 1, m_index_file) != 1) + { + Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); + if (!m_blob_file) + { + Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + return true; +} + +bool GPUShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename) +{ + m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); + if (!m_index_file) + { + // special case here: when there's a sharing violation (i.e. two instances running), + // we don't want to blow away the cache. so just continue without a cache. + if (errno == EACCES) + { + Log_WarningPrintf("Failed to open shader cache index with EACCES, are you running two instances?"); + return true; + } + + return false; + } + + u32 file_version = 0; + if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != m_version) + { + Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); + if (!m_blob_file) + { + Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + std::fseek(m_blob_file, 0, SEEK_END); + const u32 blob_file_size = static_cast(std::ftell(m_blob_file)); + + for (;;) + { + CacheIndexEntry entry; + if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 || + (entry.file_offset + entry.compressed_size) > blob_file_size) + { + if (std::feof(m_index_file)) + break; + + Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); + m_index.clear(); + std::fclose(m_blob_file); + m_blob_file = nullptr; + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + const CacheIndexKey key{entry.shader_type, entry.source_length, entry.source_hash_low, + entry.source_hash_high, entry.entry_point_low, entry.entry_point_high}; + const CacheIndexData data{entry.file_offset, entry.compressed_size, entry.uncompressed_size}; + m_index.emplace(key, data); + } + + // ensure we don't write before seeking + std::fseek(m_index_file, 0, SEEK_END); + + Log_DevPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str()); + return true; +} + +GPUShaderCache::CacheIndexKey GPUShaderCache::GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code, + const std::string_view& entry_point) +{ + union + { + struct + { + u64 hash_low; + u64 hash_high; + }; + u8 hash[16]; + } h; + + CacheIndexKey key = {}; + key.shader_type = static_cast(stage); + + MD5Digest digest; + digest.Update(shader_code.data(), static_cast(shader_code.length())); + digest.Final(h.hash); + key.source_hash_low = h.hash_low; + key.source_hash_high = h.hash_high; + key.source_length = static_cast(shader_code.length()); + + digest.Reset(); + digest.Update(entry_point.data(), static_cast(entry_point.length())); + digest.Final(h.hash); + key.entry_point_low = h.hash_low; + key.entry_point_high = h.hash_high; + + return key; +} + +bool GPUShaderCache::Lookup(const CacheIndexKey& key, ShaderBinary* binary) +{ + auto iter = m_index.find(key); + if (iter == m_index.end()) + return false; + + binary->resize(iter->second.uncompressed_size); + + DynamicHeapArray compressed_data(iter->second.compressed_size); + + if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || + std::fread(compressed_data.data(), iter->second.compressed_size, 1, m_blob_file) != 1) + { + Log_ErrorPrintf("Read %u byte %s shader from file failed", iter->second.compressed_size, + GPUShader::GetStageName(static_cast(key.shader_type))); + return false; + } + + const size_t decompress_result = + ZSTD_decompress(binary->data(), binary->size(), compressed_data.data(), compressed_data.size()); + if (ZSTD_isError(decompress_result)) + { + Log_ErrorPrintf("Failed to decompress shader: %s", ZSTD_getErrorName(decompress_result)); + return false; + } + + return true; +} + +bool GPUShaderCache::Insert(const CacheIndexKey& key, const void* data, u32 data_size) +{ + DynamicHeapArray compress_buffer(ZSTD_compressBound(data_size)); + const size_t compress_result = ZSTD_compress(compress_buffer.data(), compress_buffer.size(), data, data_size, 0); + if (ZSTD_isError(compress_result)) + { + Log_ErrorPrintf("Failed to compress shader: %s", ZSTD_getErrorName(compress_result)); + return false; + } + + if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0) + return false; + + CacheIndexData idata; + idata.file_offset = static_cast(std::ftell(m_blob_file)); + idata.compressed_size = static_cast(compress_result); + idata.uncompressed_size = data_size; + + CacheIndexEntry entry = {}; + entry.shader_type = static_cast(key.shader_type); + entry.source_length = key.source_length; + entry.source_hash_low = key.source_hash_low; + entry.source_hash_high = key.source_hash_high; + entry.entry_point_low = key.entry_point_low; + entry.entry_point_high = key.entry_point_high; + entry.file_offset = idata.file_offset; + entry.compressed_size = idata.compressed_size; + entry.uncompressed_size = idata.uncompressed_size; + + if (std::fwrite(compress_buffer.data(), compress_result, 1, m_blob_file) != 1 || std::fflush(m_blob_file) != 0 || + std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || std::fflush(m_index_file) != 0) + { + Log_ErrorPrintf("Failed to write %u byte %s shader blob to file", data_size, + GPUShader::GetStageName(static_cast(key.shader_type))); + return false; + } + + Log_DevPrintf("Cached compressed %s shader: %u -> %u bytes", + GPUShader::GetStageName(static_cast(key.shader_type)), data_size, + static_cast(compress_result)); + m_index.emplace(key, idata); + return true; +} diff --git a/src/util/gpu_shader_cache.h b/src/util/gpu_shader_cache.h new file mode 100644 index 000000000..032aea29b --- /dev/null +++ b/src/util/gpu_shader_cache.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/hash_combine.h" +#include "common/heap_array.h" +#include "common/types.h" + +#include +#include +#include +#include + +enum class GPUShaderStage : u8; + +class GPUShaderCache +{ +public: + using ShaderBinary = DynamicHeapArray; + + struct alignas(8) CacheIndexKey + { + u32 shader_type; + u32 source_length; + u64 source_hash_low; + u64 source_hash_high; + u64 entry_point_low; + u64 entry_point_high; + + bool operator==(const CacheIndexKey& key) const; + bool operator!=(const CacheIndexKey& key) const; + }; + static_assert(sizeof(CacheIndexKey) == 40, "Cache key has no padding"); + + struct CacheIndexEntryHash + { + std::size_t operator()(const CacheIndexKey& e) const noexcept; + }; + + GPUShaderCache(); + ~GPUShaderCache(); + + ALWAYS_INLINE const std::string& GetBaseFilename() const { return m_base_filename; } + ALWAYS_INLINE u32 GetVersion() const { return m_version; } + + bool IsOpen() const { return (m_index_file != nullptr); } + + bool Open(const std::string_view& base_filename, u32 version); + bool Create(); + void Close(); + + static CacheIndexKey GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code, + const std::string_view& entry_point); + + bool Lookup(const CacheIndexKey& key, ShaderBinary* binary); + bool Insert(const CacheIndexKey& key, const void* data, u32 data_size); + void Clear(); + +private: + struct CacheIndexData + { + u32 file_offset; + u32 compressed_size; + u32 uncompressed_size; + }; + + using CacheIndex = std::unordered_map; + + bool CreateNew(const std::string& index_filename, const std::string& blob_filename); + bool ReadExisting(const std::string& index_filename, const std::string& blob_filename); + + CacheIndex m_index; + + std::string m_base_filename; + u32 m_version; + + std::FILE* m_index_file = nullptr; + std::FILE* m_blob_file = nullptr; +}; diff --git a/src/common/gpu_texture.cpp b/src/util/gpu_texture.cpp similarity index 60% rename from src/common/gpu_texture.cpp rename to src/util/gpu_texture.cpp index 7744ff8b9..a08ae9b5c 100644 --- a/src/common/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -1,20 +1,52 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_texture.h" -#include "log.h" -#include "string_util.h" +#include "gpu_device.h" + +#include "common/log.h" +#include "common/string_util.h" + Log_SetChannel(GPUTexture); GPUTexture::GPUTexture() = default; -GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, GPUTexture::Format format) - : m_width(width), m_height(height), m_layers(layers), m_levels(levels), m_samples(samples), m_format(format) +GPUTexture::GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format) + : m_width(width), m_height(height), m_layers(layers), m_levels(levels), m_samples(samples), m_type(type), + m_format(format) { } GPUTexture::~GPUTexture() = default; +const char* GPUTexture::GetFormatName(Format format) +{ + static constexpr const char* format_names[static_cast(Format::MaxCount)] = { + "Unknown", // Unknown + "RGBA8", // RGBA8 + "BGRA8", // BGRA8 + "RGB565", // RGB565 + "RGB5551", // RGBA5551 + "R8", // R8 + "D16", // D16 + "R16", // R16 + "R16F", // R16F + "R32I", // R32I + "R32U", // R32U + "R32F", // R32F + "RG8", // RG8 + "RG16", // RG16 + "RG16F", // RG16F + "RG32F", // RG32F + "RGBA16", // RGBA16 + "RGBA16F", // RGBA16F + "RGBA32F", // RGBA32F + "RGB10A2", // RGB10A2 + }; + + return format_names[static_cast(format)]; +} + void GPUTexture::ClearBaseProperties() { m_width = 0; @@ -22,10 +54,17 @@ void GPUTexture::ClearBaseProperties() m_layers = 0; m_levels = 0; m_samples = 0; + m_type = GPUTexture::Type::Unknown; m_format = GPUTexture::Format::Unknown; + m_state = State::Dirty; } -u32 GPUTexture::GPUTexture::GetPixelSize(GPUTexture::Format format) +std::array GPUTexture::GetUNormClearColor() const +{ + return GPUDevice::RGBA8ToFloat(m_clear_value.color); +} + +u32 GPUTexture::GetPixelSize(GPUTexture::Format format) { switch (format) { @@ -51,12 +90,56 @@ bool GPUTexture::IsDepthFormat(Format format) return (format == Format::D16); } +bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format) +{ + if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) + { + Log_ErrorPrintf("Invalid dimensions: %ux%ux%u %u %u.", width, height, layers, levels, samples); + return false; + } + + const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); + if (width > max_texture_size || height > max_texture_size) + { + Log_ErrorPrintf("Texture width (%u) or height (%u) exceeds max texture size (%u).", width, height, + max_texture_size); + return false; + } + + const u32 max_samples = g_gpu_device->GetMaxMultisamples(); + if (samples > max_samples) + { + Log_ErrorPrintf("Texture samples (%u) exceeds max samples (%u).", samples, max_samples); + return false; + } + + if (samples > 1 && levels > 1) + { + Log_ErrorPrintf("Multisampled textures can't have mip levels."); + return false; + } + + if (layers > 1 && type != Type::Texture) + { + Log_ErrorPrintf("Texture arrays are not supported on targets."); + return false; + } + + if (levels > 1 && type != Type::Texture) + { + Log_ErrorPrintf("Mipmaps are not supported on targets."); + return false; + } + + return true; +} + bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, GPUTexture::Format format) { switch (format) { - case GPUTexture::Format::BGRA8: + case Format::BGRA8: { for (u32 y = 0; y < height; y++) { @@ -68,10 +151,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector temp(width * height); @@ -99,7 +182,7 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector temp(width * height); @@ -145,4 +228,8 @@ void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector& t std::memcpy(top_ptr, bottom_ptr, texture_data_stride); std::memcpy(bottom_ptr, temp.data(), texture_data_stride); } -} \ No newline at end of file +} + +void GPUTexture::MakeReadyForSampling() +{ +} diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h new file mode 100644 index 000000000..c4db7a267 --- /dev/null +++ b/src/util/gpu_texture.h @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/types.h" + +#include +#include +#include +#include + +class GPUTexture +{ +public: + enum : u32 + { + MAX_WIDTH = 65535, + MAX_HEIGHT = 65535, + MAX_LAYERS = 255, + MAX_LEVELS = 255, + MAX_SAMPLES = 255, + }; + + enum class Type : u8 + { + Unknown, + RenderTarget, + DepthStencil, + Texture, + RWTexture, + }; + + enum class Format : u8 + { + Unknown, + RGBA8, + BGRA8, + RGB565, + RGBA5551, + R8, + D16, + R16, + R16F, + R32I, + R32U, + R32F, + RG8, + RG16, + RG16F, + RG32F, + RGBA16, + RGBA16F, + RGBA32F, + RGB10A2, + MaxCount + }; + + enum class State : u8 + { + Dirty, + Cleared, + Invalidated + }; + + union ClearValue + { + u32 color; + float depth; + }; + +public: + virtual ~GPUTexture(); + + static const char* GetFormatName(Format format); + + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE u32 GetLayers() const { return m_layers; } + ALWAYS_INLINE u32 GetLevels() const { return m_levels; } + ALWAYS_INLINE u32 GetSamples() const { return m_samples; } + ALWAYS_INLINE Type GetType() const { return m_type; } + ALWAYS_INLINE Format GetFormat() const { return m_format; } + + ALWAYS_INLINE bool IsTextureArray() const { return m_layers > 1; } + ALWAYS_INLINE bool IsMultisampled() const { return m_samples > 1; } + + ALWAYS_INLINE u32 GetPixelSize() const { return GetPixelSize(m_format); } + ALWAYS_INLINE u32 GetMipWidth(u32 level) const { return std::max(m_width >> level, 1u); } + ALWAYS_INLINE u32 GetMipHeight(u32 level) const { return std::max(m_height >> level, 1u); } + + ALWAYS_INLINE State GetState() const { return m_state; } + ALWAYS_INLINE void SetState(State state) { m_state = state; } + + ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const + { + return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil); + } + ALWAYS_INLINE bool IsRenderTarget() const { return (m_type == Type::RenderTarget); } + ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); } + ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture); } + + ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; } + ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; } + ALWAYS_INLINE float GetClearDepth() const { return m_clear_value.depth; } + std::array GetUNormClearColor() const; + + ALWAYS_INLINE void SetClearColor(u32 color) + { + m_state = State::Cleared; + m_clear_value.color = color; + } + ALWAYS_INLINE void SetClearDepth(float depth) + { + m_state = State::Cleared; + m_clear_value.depth = depth; + } + + static u32 GetPixelSize(GPUTexture::Format format); + static bool IsDepthFormat(GPUTexture::Format format); + static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format); + + static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, + GPUTexture::Format format); + static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector& texture_data, u32 texture_data_stride); + + virtual bool IsValid() const = 0; + + virtual bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, + u32 level = 0) = 0; + virtual bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) = 0; + virtual void Unmap() = 0; + + // Instructs the backend that we're finished rendering to this texture. It may transition it to a new layout. + virtual void MakeReadyForSampling(); + + virtual void SetDebugName(const std::string_view& name) = 0; + +protected: + GPUTexture(); + GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format); + + void ClearBaseProperties(); + + u16 m_width = 0; + u16 m_height = 0; + u8 m_layers = 0; + u8 m_levels = 0; + u8 m_samples = 0; + Type m_type = Type::Unknown; + Format m_format = Format::Unknown; + State m_state = State::Dirty; + + ClearValue m_clear_value = {}; +}; diff --git a/src/util/host_display.cpp b/src/util/host_display.cpp deleted file mode 100644 index e573bd090..000000000 --- a/src/util/host_display.cpp +++ /dev/null @@ -1,702 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "host_display.h" -#include "common/align.h" -#include "common/assert.h" -#include "common/file_system.h" -#include "common/log.h" -#include "common/string_util.h" -#include "common/timer.h" -#include "core/settings.h" // TODO FIXME -#include "stb_image.h" -#include "stb_image_resize.h" -#include "stb_image_write.h" -#include -#include -#include -#include -#include -Log_SetChannel(HostDisplay); - -std::unique_ptr g_host_display; - -HostDisplay::~HostDisplay() = default; - -RenderAPI HostDisplay::GetPreferredAPI() -{ -#ifdef _WIN32 - return RenderAPI::D3D11; -#else - return RenderAPI::OpenGL; -#endif -} - -void HostDisplay::DestroyResources() -{ - m_cursor_texture.reset(); -} - -bool HostDisplay::UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) -{ - void* map_ptr; - u32 map_pitch; - if (!BeginTextureUpdate(texture, width, height, &map_ptr, &map_pitch)) - return false; - - StringUtil::StrideMemCpy(map_ptr, map_pitch, data, pitch, std::min(pitch, map_pitch), height); - EndTextureUpdate(texture, x, y, width, height); - return true; -} - -bool HostDisplay::ParseFullscreenMode(const std::string_view& mode, u32* width, u32* height, float* refresh_rate) -{ - if (!mode.empty()) - { - std::string_view::size_type sep1 = mode.find('x'); - if (sep1 != std::string_view::npos) - { - std::optional owidth = StringUtil::FromChars(mode.substr(0, sep1)); - sep1++; - - while (sep1 < mode.length() && std::isspace(mode[sep1])) - sep1++; - - if (owidth.has_value() && sep1 < mode.length()) - { - std::string_view::size_type sep2 = mode.find('@', sep1); - if (sep2 != std::string_view::npos) - { - std::optional oheight = StringUtil::FromChars(mode.substr(sep1, sep2 - sep1)); - sep2++; - - while (sep2 < mode.length() && std::isspace(mode[sep2])) - sep2++; - - if (oheight.has_value() && sep2 < mode.length()) - { - std::optional orefresh_rate = StringUtil::FromChars(mode.substr(sep2)); - if (orefresh_rate.has_value()) - { - *width = owidth.value(); - *height = oheight.value(); - *refresh_rate = orefresh_rate.value(); - return true; - } - } - } - } - } - } - - *width = 0; - *height = 0; - *refresh_rate = 0; - return false; -} - -std::string HostDisplay::GetFullscreenModeString(u32 width, u32 height, float refresh_rate) -{ - return StringUtil::StdStringFromFormat("%u x %u @ %f hz", width, height, refresh_rate); -} - -bool HostDisplay::UsesLowerLeftOrigin() const -{ - const RenderAPI api = GetRenderAPI(); - return (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES); -} - -void HostDisplay::SetDisplayMaxFPS(float max_fps) -{ - m_display_frame_interval = (max_fps > 0.0f) ? (1.0f / max_fps) : 0.0f; -} - -bool HostDisplay::ShouldSkipDisplayingFrame() -{ - if (m_display_frame_interval == 0.0f) - return false; - - const u64 now = Common::Timer::GetCurrentValue(); - const double diff = Common::Timer::ConvertValueToSeconds(now - m_last_frame_displayed_time); - if (diff < m_display_frame_interval) - return true; - - m_last_frame_displayed_time = now; - return false; -} - -void HostDisplay::ThrottlePresentation() -{ - const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; - - const u64 sleep_period = Common::Timer::ConvertNanosecondsToValue(1e+9f / static_cast(throttle_rate)); - const u64 current_ts = Common::Timer::GetCurrentValue(); - - // Allow it to fall behind/run ahead up to 2*period. Sleep isn't that precise, plus we need to - // allow time for the actual rendering. - const u64 max_variance = sleep_period * 2; - if (static_cast(std::abs(static_cast(current_ts - m_last_frame_displayed_time))) > max_variance) - m_last_frame_displayed_time = current_ts + sleep_period; - else - m_last_frame_displayed_time += sleep_period; - - Common::Timer::SleepUntil(m_last_frame_displayed_time, false); -} - -bool HostDisplay::GetHostRefreshRate(float* refresh_rate) -{ - if (m_window_info.surface_refresh_rate > 0.0f) - { - *refresh_rate = m_window_info.surface_refresh_rate; - return true; - } - - return WindowInfo::QueryRefreshRateForWindow(m_window_info, refresh_rate); -} - -bool HostDisplay::SetGPUTimingEnabled(bool enabled) -{ - return false; -} - -float HostDisplay::GetAndResetAccumulatedGPUTime() -{ - return 0.0f; -} - -void HostDisplay::SetSoftwareCursor(std::unique_ptr texture, float scale /*= 1.0f*/) -{ - m_cursor_texture = std::move(texture); - m_cursor_texture_scale = scale; -} - -bool HostDisplay::SetSoftwareCursor(const void* pixels, u32 width, u32 height, u32 stride, float scale /*= 1.0f*/) -{ - std::unique_ptr tex = - CreateTexture(width, height, 1, 1, 1, GPUTexture::Format::RGBA8, pixels, stride, false); - if (!tex) - return false; - - SetSoftwareCursor(std::move(tex), scale); - return true; -} - -bool HostDisplay::SetSoftwareCursor(const char* path, float scale /*= 1.0f*/) -{ - auto fp = FileSystem::OpenManagedCFile(path, "rb"); - if (!fp) - { - return false; - } - - int width, height, file_channels; - u8* pixel_data = stbi_load_from_file(fp.get(), &width, &height, &file_channels, 4); - if (!pixel_data) - { - const char* error_reason = stbi_failure_reason(); - Log_ErrorPrintf("Failed to load image from '%s': %s", path, error_reason ? error_reason : "unknown error"); - return false; - } - - std::unique_ptr tex = - CreateTexture(static_cast(width), static_cast(height), 1, 1, 1, GPUTexture::Format::RGBA8, pixel_data, - sizeof(u32) * static_cast(width), false); - stbi_image_free(pixel_data); - if (!tex) - return false; - - Log_InfoPrintf("Loaded %dx%d image from '%s' for software cursor", width, height, path); - SetSoftwareCursor(std::move(tex), scale); - return true; -} - -void HostDisplay::ClearSoftwareCursor() -{ - m_cursor_texture.reset(); - m_cursor_texture_scale = 1.0f; -} - -bool HostDisplay::IsUsingLinearFiltering() const -{ - return g_settings.display_linear_filtering; -} - -void HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, - float* out_width, float* out_height, float* out_left_padding, - float* out_top_padding, float* out_scale, float* out_x_scale, - bool apply_aspect_ratio /* = true */) const -{ - const float window_ratio = static_cast(window_width) / static_cast(window_height); - const float display_aspect_ratio = g_settings.display_stretch ? window_ratio : m_display_aspect_ratio; - const float x_scale = - apply_aspect_ratio ? - (display_aspect_ratio / (static_cast(m_display_width) / static_cast(m_display_height))) : - 1.0f; - const float display_width = g_settings.display_stretch_vertically ? static_cast(m_display_width) : - static_cast(m_display_width) * x_scale; - const float display_height = g_settings.display_stretch_vertically ? static_cast(m_display_height) / x_scale : - static_cast(m_display_height); - const float active_left = g_settings.display_stretch_vertically ? static_cast(m_display_active_left) : - static_cast(m_display_active_left) * x_scale; - const float active_top = g_settings.display_stretch_vertically ? static_cast(m_display_active_top) / x_scale : - static_cast(m_display_active_top); - const float active_width = g_settings.display_stretch_vertically ? - static_cast(m_display_active_width) : - static_cast(m_display_active_width) * x_scale; - const float active_height = g_settings.display_stretch_vertically ? - static_cast(m_display_active_height) / x_scale : - static_cast(m_display_active_height); - if (out_x_scale) - *out_x_scale = x_scale; - - // now fit it within the window - float scale; - if ((display_width / display_height) >= window_ratio) - { - // align in middle vertically - scale = static_cast(window_width) / display_width; - if (g_settings.display_integer_scaling) - scale = std::max(std::floor(scale), 1.0f); - - if (out_left_padding) - { - if (g_settings.display_integer_scaling) - *out_left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); - else - *out_left_padding = 0.0f; - } - if (out_top_padding) - { - switch (g_settings.display_alignment) - { - case DisplayAlignment::RightOrBottom: - *out_top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); - break; - - case DisplayAlignment::Center: - *out_top_padding = - std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); - break; - - case DisplayAlignment::LeftOrTop: - default: - *out_top_padding = 0.0f; - break; - } - } - } - else - { - // align in middle horizontally - scale = static_cast(window_height) / display_height; - if (g_settings.display_integer_scaling) - scale = std::max(std::floor(scale), 1.0f); - - if (out_left_padding) - { - switch (g_settings.display_alignment) - { - case DisplayAlignment::RightOrBottom: - *out_left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); - break; - - case DisplayAlignment::Center: - *out_left_padding = - std::max((static_cast(window_width) - (display_width * scale)) / 2.0f, 0.0f); - break; - - case DisplayAlignment::LeftOrTop: - default: - *out_left_padding = 0.0f; - break; - } - } - - if (out_top_padding) - { - if (g_settings.display_integer_scaling) - *out_top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); - else - *out_top_padding = 0.0f; - } - } - - *out_width = active_width * scale; - *out_height = active_height * scale; - *out_left = active_left * scale; - *out_top = active_top * scale; - if (out_scale) - *out_scale = scale; -} - -std::tuple HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, - bool apply_aspect_ratio /* = true */) const -{ - float left, top, width, height, left_padding, top_padding; - CalculateDrawRect(window_width, window_height, &left, &top, &width, &height, &left_padding, &top_padding, nullptr, - nullptr, apply_aspect_ratio); - - return std::make_tuple(static_cast(left + left_padding), static_cast(top + top_padding), - static_cast(width), static_cast(height)); -} - -std::tuple HostDisplay::CalculateSoftwareCursorDrawRect() const -{ - return CalculateSoftwareCursorDrawRect(m_mouse_position_x, m_mouse_position_y); -} - -std::tuple HostDisplay::CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const -{ - const float scale = m_window_info.surface_scale * m_cursor_texture_scale; - const u32 cursor_extents_x = static_cast(static_cast(m_cursor_texture->GetWidth()) * scale * 0.5f); - const u32 cursor_extents_y = static_cast(static_cast(m_cursor_texture->GetHeight()) * scale * 0.5f); - - const s32 out_left = cursor_x - cursor_extents_x; - const s32 out_top = cursor_y - cursor_extents_y; - const s32 out_width = cursor_extents_x * 2u; - const s32 out_height = cursor_extents_y * 2u; - - return std::tie(out_left, out_top, out_width, out_height); -} - -std::tuple HostDisplay::ConvertWindowCoordinatesToDisplayCoordinates(s32 window_x, s32 window_y, - s32 window_width, - s32 window_height) const -{ - float left, top, width, height, left_padding, top_padding; - float scale, x_scale; - CalculateDrawRect(window_width, window_height, &left, &top, &width, &height, &left_padding, &top_padding, &scale, - &x_scale); - - // convert coordinates to active display region, then to full display region - const float scaled_display_x = static_cast(window_x) - left_padding; - const float scaled_display_y = static_cast(window_y) - top_padding; - - // scale back to internal resolution - const float display_x = scaled_display_x / scale / x_scale; - const float display_y = scaled_display_y / scale; - - return std::make_tuple(display_x, display_y); -} - -static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, - bool clear_alpha, bool flip_y, u32 resize_width, u32 resize_height, - std::vector texture_data, u32 texture_data_stride, - GPUTexture::Format texture_format) -{ - - const char* extension = std::strrchr(filename.c_str(), '.'); - if (!extension) - { - Log_ErrorPrintf("Unable to determine file extension for '%s'", filename.c_str()); - return false; - } - - if (!GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) - return false; - - if (clear_alpha) - { - for (u32& pixel : texture_data) - pixel |= 0xFF000000; - } - - if (flip_y) - GPUTexture::FlipTextureDataRGBA8(width, height, texture_data, texture_data_stride); - - if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height)) - { - std::vector resized_texture_data(resize_width * resize_height); - u32 resized_texture_stride = sizeof(u32) * resize_width; - if (!stbir_resize_uint8(reinterpret_cast(texture_data.data()), width, height, texture_data_stride, - reinterpret_cast(resized_texture_data.data()), resize_width, resize_height, - resized_texture_stride, 4)) - { - Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height); - return false; - } - - width = resize_width; - height = resize_height; - texture_data = std::move(resized_texture_data); - texture_data_stride = resized_texture_stride; - } - - const auto write_func = [](void* context, void* data, int size) { - std::fwrite(data, 1, size, static_cast(context)); - }; - - bool result = false; - if (StringUtil::Strcasecmp(extension, ".png") == 0) - { - result = - (stbi_write_png_to_func(write_func, fp.get(), width, height, 4, texture_data.data(), texture_data_stride) != 0); - } - else if (StringUtil::Strcasecmp(extension, ".jpg") == 0) - { - result = (stbi_write_jpg_to_func(write_func, fp.get(), width, height, 4, texture_data.data(), 95) != 0); - } - else if (StringUtil::Strcasecmp(extension, ".tga") == 0) - { - result = (stbi_write_tga_to_func(write_func, fp.get(), width, height, 4, texture_data.data()) != 0); - } - else if (StringUtil::Strcasecmp(extension, ".bmp") == 0) - { - result = (stbi_write_bmp_to_func(write_func, fp.get(), width, height, 4, texture_data.data()) != 0); - } - - if (!result) - { - Log_ErrorPrintf("Unknown extension in filename '%s' or save error: '%s'", filename.c_str(), extension); - return false; - } - - return true; -} - -bool HostDisplay::WriteTextureToFile(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, std::string filename, - bool clear_alpha /* = true */, bool flip_y /* = false */, - u32 resize_width /* = 0 */, u32 resize_height /* = 0 */, - bool compress_on_thread /* = false */) -{ - std::vector texture_data(width * height); - u32 texture_data_stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(texture->GetFormat()) * width, 4); - if (!DownloadTexture(texture, x, y, width, height, texture_data.data(), texture_data_stride)) - { - Log_ErrorPrintf("Texture download failed"); - return false; - } - - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); - if (!fp) - { - Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); - return false; - } - - if (!compress_on_thread) - { - return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), clear_alpha, flip_y, - resize_width, resize_height, std::move(texture_data), texture_data_stride, - texture->GetFormat()); - } - - std::thread compress_thread(CompressAndWriteTextureToFile, width, height, std::move(filename), std::move(fp), - clear_alpha, flip_y, resize_width, resize_height, std::move(texture_data), - texture_data_stride, texture->GetFormat()); - compress_thread.detach(); - return true; -} - -bool HostDisplay::WriteDisplayTextureToFile(std::string filename, bool full_resolution /* = true */, - bool apply_aspect_ratio /* = true */, bool compress_on_thread /* = false */) -{ - if (!m_display_texture) - return false; - - s32 resize_width = 0; - s32 resize_height = std::abs(m_display_texture_view_height); - if (apply_aspect_ratio) - { - const float ss_width_scale = static_cast(m_display_active_width) / static_cast(m_display_width); - const float ss_height_scale = static_cast(m_display_active_height) / static_cast(m_display_height); - const float ss_aspect_ratio = m_display_aspect_ratio * ss_width_scale / ss_height_scale; - resize_width = g_settings.display_stretch_vertically ? - m_display_texture_view_width : - static_cast(static_cast(resize_height) * ss_aspect_ratio); - resize_height = g_settings.display_stretch_vertically ? - static_cast(static_cast(resize_height) / - (m_display_aspect_ratio / - (static_cast(m_display_width) / static_cast(m_display_height)))) : - resize_height; - } - else - { - resize_width = m_display_texture_view_width; - } - - if (!full_resolution) - { - const s32 resolution_scale = std::abs(m_display_texture_view_height) / m_display_active_height; - resize_height /= resolution_scale; - resize_width /= resolution_scale; - } - - if (resize_width <= 0 || resize_height <= 0) - return false; - - const bool flip_y = (m_display_texture_view_height < 0); - s32 read_height = m_display_texture_view_height; - s32 read_y = m_display_texture_view_y; - if (flip_y) - { - read_height = -m_display_texture_view_height; - read_y = - (m_display_texture->GetHeight() - read_height) - (m_display_texture->GetHeight() - m_display_texture_view_y); - } - - return WriteTextureToFile(m_display_texture, m_display_texture_view_x, read_y, m_display_texture_view_width, - read_height, std::move(filename), true, flip_y, static_cast(resize_width), - static_cast(resize_height), compress_on_thread); -} - -bool HostDisplay::WriteDisplayTextureToBuffer(std::vector* buffer, u32 resize_width /* = 0 */, - u32 resize_height /* = 0 */, bool clear_alpha /* = true */) -{ - if (!m_display_texture) - return false; - - const bool flip_y = (m_display_texture_view_height < 0); - s32 read_width = m_display_texture_view_width; - s32 read_height = m_display_texture_view_height; - s32 read_x = m_display_texture_view_x; - s32 read_y = m_display_texture_view_y; - if (flip_y) - { - read_height = -m_display_texture_view_height; - read_y = - (m_display_texture->GetHeight() - read_height) - (m_display_texture->GetHeight() - m_display_texture_view_y); - } - - u32 width = static_cast(read_width); - u32 height = static_cast(read_height); - std::vector texture_data(width * height); - u32 texture_data_stride = Common::AlignUpPow2(m_display_texture->GetPixelSize() * width, 4); - if (!DownloadTexture(m_display_texture, read_x, read_y, width, height, texture_data.data(), texture_data_stride)) - { - Log_ErrorPrintf("Failed to download texture from GPU."); - return false; - } - - if (!GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, - m_display_texture->GetFormat())) - { - return false; - } - - if (clear_alpha) - { - for (u32& pixel : texture_data) - pixel |= 0xFF000000; - } - - if (flip_y) - { - std::vector temp(width); - for (u32 flip_row = 0; flip_row < (height / 2); flip_row++) - { - u32* top_ptr = &texture_data[flip_row * width]; - u32* bottom_ptr = &texture_data[((height - 1) - flip_row) * width]; - std::memcpy(temp.data(), top_ptr, texture_data_stride); - std::memcpy(top_ptr, bottom_ptr, texture_data_stride); - std::memcpy(bottom_ptr, temp.data(), texture_data_stride); - } - } - - if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height)) - { - std::vector resized_texture_data(resize_width * resize_height); - u32 resized_texture_stride = sizeof(u32) * resize_width; - if (!stbir_resize_uint8(reinterpret_cast(texture_data.data()), width, height, texture_data_stride, - reinterpret_cast(resized_texture_data.data()), resize_width, resize_height, - resized_texture_stride, 4)) - { - Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height); - return false; - } - - width = resize_width; - height = resize_height; - *buffer = std::move(resized_texture_data); - texture_data_stride = resized_texture_stride; - } - else - { - *buffer = texture_data; - } - - return true; -} - -bool HostDisplay::WriteScreenshotToFile(std::string filename, bool internal_resolution /* = false */, - bool compress_on_thread /* = false */) -{ - u32 width = m_window_info.surface_width; - u32 height = m_window_info.surface_height; - auto [draw_left, draw_top, draw_width, draw_height] = CalculateDrawRect(width, height); - - if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) - { - // If internal res, scale the computed draw rectangle to the internal res. - // We re-use the draw rect because it's already been AR corrected. - const float sar = - static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); - const float dar = static_cast(draw_width) / static_cast(draw_height); - if (sar >= dar) - { - // stretch height, preserve width - const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); - width = m_display_texture_view_width; - height = static_cast(std::round(static_cast(draw_height) * scale)); - } - else - { - // stretch width, preserve height - const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); - width = static_cast(std::round(static_cast(draw_width) * scale)); - height = m_display_texture_view_height; - } - - // DX11 won't go past 16K texture size. - constexpr u32 MAX_TEXTURE_SIZE = 16384; - if (width > MAX_TEXTURE_SIZE) - { - height = static_cast(static_cast(height) / - (static_cast(width) / static_cast(MAX_TEXTURE_SIZE))); - width = MAX_TEXTURE_SIZE; - } - if (height > MAX_TEXTURE_SIZE) - { - height = MAX_TEXTURE_SIZE; - width = static_cast(static_cast(width) / - (static_cast(height) / static_cast(MAX_TEXTURE_SIZE))); - } - - // Remove padding, it's not part of the framebuffer. - draw_left = 0; - draw_top = 0; - draw_width = static_cast(width); - draw_height = static_cast(height); - } - if (width == 0 || height == 0) - return false; - - std::vector pixels; - u32 pixels_stride; - GPUTexture::Format pixels_format; - if (!RenderScreenshot(width, height, - Common::Rectangle::FromExtents(draw_left, draw_top, draw_width, draw_height), &pixels, - &pixels_stride, &pixels_format)) - { - Log_ErrorPrintf("Failed to render %ux%u screenshot", width, height); - return false; - } - - auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); - if (!fp) - { - Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); - return false; - } - - if (!compress_on_thread) - { - return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), true, UsesLowerLeftOrigin(), - width, height, std::move(pixels), pixels_stride, pixels_format); - } - - std::thread compress_thread(CompressAndWriteTextureToFile, width, height, std::move(filename), std::move(fp), true, - UsesLowerLeftOrigin(), width, height, std::move(pixels), pixels_stride, pixels_format); - compress_thread.detach(); - return true; -} diff --git a/src/util/host_display.h b/src/util/host_display.h deleted file mode 100644 index 80d4ab728..000000000 --- a/src/util/host_display.h +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once - -#include "common/gpu_texture.h" -#include "common/rectangle.h" -#include "common/window_info.h" - -#include -#include -#include -#include -#include - -enum class RenderAPI : u32 -{ - None, - D3D11, - D3D12, - Vulkan, - OpenGL, - OpenGLES -}; - -// Interface to the frontend's renderer. -class HostDisplay -{ -public: - struct AdapterAndModeList - { - std::vector adapter_names; - std::vector fullscreen_modes; - }; - - virtual ~HostDisplay(); - - /// Returns the default/preferred API for the system. - static RenderAPI GetPreferredAPI(); - - /// Parses a fullscreen mode into its components (width * height @ refresh hz) - static bool ParseFullscreenMode(const std::string_view& mode, u32* width, u32* height, float* refresh_rate); - - /// Converts a fullscreen mode to a string. - static std::string GetFullscreenModeString(u32 width, u32 height, float refresh_rate); - - ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; } - ALWAYS_INLINE s32 GetWindowWidth() const { return static_cast(m_window_info.surface_width); } - ALWAYS_INLINE s32 GetWindowHeight() const { return static_cast(m_window_info.surface_height); } - ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; } - - // Position is relative to the top-left corner of the window. - ALWAYS_INLINE s32 GetMousePositionX() const { return m_mouse_position_x; } - ALWAYS_INLINE s32 GetMousePositionY() const { return m_mouse_position_y; } - ALWAYS_INLINE void SetMousePosition(s32 x, s32 y) - { - m_mouse_position_x = x; - m_mouse_position_y = y; - } - - ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture; } - ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } - ALWAYS_INLINE s32 GetDisplayHeight() const { return m_display_height; } - ALWAYS_INLINE float GetDisplayAspectRatio() const { return m_display_aspect_ratio; } - ALWAYS_INLINE bool IsGPUTimingEnabled() const { return m_gpu_timing_enabled; } - - virtual RenderAPI GetRenderAPI() const = 0; - virtual void* GetDevice() const = 0; - virtual void* GetContext() const = 0; - - virtual bool HasDevice() const = 0; - virtual bool HasSurface() const = 0; - - virtual bool CreateDevice(const WindowInfo& wi, bool vsync) = 0; - virtual bool SetupDevice() = 0; - virtual bool MakeCurrent() = 0; - virtual bool DoneCurrent() = 0; - virtual void DestroySurface() = 0; - virtual bool ChangeWindow(const WindowInfo& wi) = 0; - virtual bool SupportsFullscreen() const = 0; - virtual bool IsFullscreen() = 0; - virtual bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) = 0; - virtual AdapterAndModeList GetAdapterAndModeList() = 0; - virtual bool CreateResources() = 0; - virtual void DestroyResources(); - - virtual bool SetPostProcessingChain(const std::string_view& config) = 0; - - /// Call when the window size changes externally to recreate any resources. - virtual void ResizeWindow(s32 new_window_width, s32 new_window_height) = 0; - - /// Creates an abstracted RGBA8 texture. If dynamic, the texture can be updated with UpdateTexture() below. - virtual std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, u32 data_stride, - bool dynamic = false) = 0; - virtual bool BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) = 0; - virtual void EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) = 0; - - virtual bool UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch); - - virtual bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) = 0; - - /// Returns false if the window was completely occluded. - virtual bool Render(bool skip_present) = 0; - - /// Renders the display with postprocessing to the specified image. - virtual bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) = 0; - - ALWAYS_INLINE bool IsVsyncEnabled() const { return m_vsync_enabled; } - virtual void SetVSync(bool enabled) = 0; - - /// ImGui context management, usually called by derived classes. - virtual bool CreateImGuiContext() = 0; - virtual void DestroyImGuiContext() = 0; - virtual bool UpdateImGuiFontTexture() = 0; - - bool UsesLowerLeftOrigin() const; - void SetDisplayMaxFPS(float max_fps); - bool ShouldSkipDisplayingFrame(); - void ThrottlePresentation(); - - void ClearDisplayTexture() - { - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; - m_display_changed = true; - } - - void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) - { - m_display_texture = texture; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; - m_display_changed = true; - } - - void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height) - { - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; - m_display_changed = true; - } - - void SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, s32 active_width, - s32 active_height, float display_aspect_ratio) - { - m_display_width = display_width; - m_display_height = display_height; - m_display_active_left = active_left; - m_display_active_top = active_top; - m_display_active_width = active_width; - m_display_active_height = active_height; - m_display_aspect_ratio = display_aspect_ratio; - m_display_changed = true; - } - - virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0; - - virtual bool GetHostRefreshRate(float* refresh_rate); - - /// Enables/disables GPU frame timing. - virtual bool SetGPUTimingEnabled(bool enabled); - - /// Returns the amount of GPU time utilized since the last time this method was called. - virtual float GetAndResetAccumulatedGPUTime(); - - /// Sets the software cursor to the specified texture. Ownership of the texture is transferred. - void SetSoftwareCursor(std::unique_ptr texture, float scale = 1.0f); - - /// Sets the software cursor to the specified image. - bool SetSoftwareCursor(const void* pixels, u32 width, u32 height, u32 stride, float scale = 1.0f); - - /// Sets the software cursor to the specified path (png image). - bool SetSoftwareCursor(const char* path, float scale = 1.0f); - - /// Disables the software cursor. - void ClearSoftwareCursor(); - - /// Helper function for computing the draw rectangle in a larger window. - std::tuple CalculateDrawRect(s32 window_width, s32 window_height, - bool apply_aspect_ratio = true) const; - - /// Helper function for converting window coordinates to display coordinates. - std::tuple ConvertWindowCoordinatesToDisplayCoordinates(s32 window_x, s32 window_y, s32 window_width, - s32 window_height) const; - - /// Helper function to save texture data to a PNG. If flip_y is set, the image will be flipped aka OpenGL. - bool WriteTextureToFile(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, std::string filename, - bool clear_alpha = true, bool flip_y = false, u32 resize_width = 0, u32 resize_height = 0, - bool compress_on_thread = false); - - /// Helper function to save current display texture to PNG. - bool WriteDisplayTextureToFile(std::string filename, bool full_resolution = true, bool apply_aspect_ratio = true, - bool compress_on_thread = false); - - /// Helper function to save current display texture to a buffer. - bool WriteDisplayTextureToBuffer(std::vector* buffer, u32 resize_width = 0, u32 resize_height = 0, - bool clear_alpha = true); - - /// Helper function to save screenshot to PNG. - bool WriteScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false); - -protected: - ALWAYS_INLINE bool HasSoftwareCursor() const { return static_cast(m_cursor_texture); } - ALWAYS_INLINE bool HasDisplayTexture() const { return (m_display_texture != nullptr); } - - bool IsUsingLinearFiltering() const; - - void CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, float* out_width, - float* out_height, float* out_left_padding, float* out_top_padding, float* out_scale, - float* out_x_scale, bool apply_aspect_ratio = true) const; - - std::tuple CalculateSoftwareCursorDrawRect() const; - std::tuple CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const; - - WindowInfo m_window_info; - - u64 m_last_frame_displayed_time = 0; - - s32 m_mouse_position_x = 0; - s32 m_mouse_position_y = 0; - - s32 m_display_width = 0; - s32 m_display_height = 0; - s32 m_display_active_left = 0; - s32 m_display_active_top = 0; - s32 m_display_active_width = 0; - s32 m_display_active_height = 0; - float m_display_aspect_ratio = 1.0f; - float m_display_frame_interval = 0.0f; - - GPUTexture* m_display_texture = nullptr; - s32 m_display_texture_view_x = 0; - s32 m_display_texture_view_y = 0; - s32 m_display_texture_view_width = 0; - s32 m_display_texture_view_height = 0; - - std::unique_ptr m_cursor_texture; - float m_cursor_texture_scale = 1.0f; - - bool m_display_changed = false; - bool m_gpu_timing_enabled = false; - bool m_vsync_enabled = false; -}; - -/// Returns a pointer to the current host display abstraction. Assumes AcquireHostDisplay() has been called. -extern std::unique_ptr g_host_display; - -namespace Host { -std::unique_ptr CreateDisplayForAPI(RenderAPI api); - -/// Creates the host display. This may create a new window. The API used depends on the current configuration. -bool AcquireHostDisplay(RenderAPI api); - -/// Destroys the host display. This may close the display window. -void ReleaseHostDisplay(); - -/// Returns false if the window was completely occluded. If frame_skip is set, the frame won't be -/// displayed, but the GPU command queue will still be flushed. -// bool BeginPresentFrame(bool frame_skip); - -/// Presents the frame to the display, and renders OSD elements. -// void EndPresentFrame(); - -/// Provided by the host; renders the display. -void RenderDisplay(bool skip_present); -void InvalidateDisplay(); -} // namespace Host diff --git a/src/util/imgui_fullscreen.cpp b/src/util/imgui_fullscreen.cpp index 1804916c4..3960d9685 100644 --- a/src/util/imgui_fullscreen.cpp +++ b/src/util/imgui_fullscreen.cpp @@ -15,8 +15,8 @@ #include "common/string_util.h" #include "common/threading.h" #include "common/timer.h" +#include "gpu_device.h" #include "core/host.h" -#include "host_display.h" #include "fmt/core.h" #include "imgui_internal.h" #include "imgui_stdlib.h" @@ -267,8 +267,9 @@ std::optional ImGuiFullscreen::LoadTextureImage(const char* std::shared_ptr ImGuiFullscreen::UploadTexture(const char* path, const Common::RGBA8Image& image) { - std::unique_ptr texture = g_host_display->CreateTexture( - image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); + std::unique_ptr texture = + g_gpu_device->CreateTexture(image.GetWidth(), image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, image.GetPixels(), image.GetPitch()); if (!texture) { Log_ErrorPrintf("failed to create %ux%u texture for resource", image.GetWidth(), image.GetHeight()); diff --git a/src/util/imgui_impl_dx11.cpp b/src/util/imgui_impl_dx11.cpp deleted file mode 100644 index 3d8b0f7a6..000000000 --- a/src/util/imgui_impl_dx11.cpp +++ /dev/null @@ -1,499 +0,0 @@ -// dear imgui: Renderer Backend for DirectX11 -// This needs to be used along with a Platform Backend (e.g. Win32) - -// Implemented features: -// [X] Renderer: User texture binding. Use 'ID3D11ShaderResourceView*' as ImTextureID. Read the FAQ about ImTextureID! -// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. - -// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. -// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. -// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. -// Read online: https://github.com/ocornut/imgui/tree/master/docs - -// CHANGELOG -// (minor and older changes stripped away, please see git history for details) -// 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX). -// 2021-05-19: DirectX11: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement) -// 2021-02-18: DirectX11: Change blending equation to preserve alpha in output buffer. -// 2019-08-01: DirectX11: Fixed code querying the Geometry Shader state (would generally error with Debug layer enabled). -// 2019-07-21: DirectX11: Backup, clear and restore Geometry Shader is any is bound when calling ImGui_ImplDX10_RenderDrawData. Clearing Hull/Domain/Compute shaders without backup/restore. -// 2019-05-29: DirectX11: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. -// 2019-04-30: DirectX11: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. -// 2018-12-03: Misc: Added #pragma comment statement to automatically link with d3dcompiler.lib when using D3DCompile(). -// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. -// 2018-08-01: DirectX11: Querying for IDXGIFactory instead of IDXGIFactory1 to increase compatibility. -// 2018-07-13: DirectX11: Fixed unreleased resources in Init and Shutdown functions. -// 2018-06-08: Misc: Extracted imgui_impl_dx11.cpp/.h away from the old combined DX11+Win32 example. -// 2018-06-08: DirectX11: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle. -// 2018-02-16: Misc: Obsoleted the io.RenderDrawListsFn callback and exposed ImGui_ImplDX11_RenderDrawData() in the .h file so you can call it yourself. -// 2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves. -// 2016-05-07: DirectX11: Disabling depth-write. - -#include "imgui.h" -#include "imgui_impl_dx11.h" -#include "common/d3d11/texture.h" - -// DirectX -#include -#include -#include -#ifdef _MSC_VER -#pragma comment(lib, "d3dcompiler") // Automatically link with d3dcompiler.lib as we are using D3DCompile() below. -#endif - -// DirectX11 data -struct ImGui_ImplDX11_Data -{ - ID3D11Device* pd3dDevice; - ID3D11DeviceContext* pd3dDeviceContext; - IDXGIFactory* pFactory; - ID3D11Buffer* pVB; - ID3D11Buffer* pIB; - ID3D11VertexShader* pVertexShader; - ID3D11InputLayout* pInputLayout; - ID3D11Buffer* pVertexConstantBuffer; - ID3D11PixelShader* pPixelShader; - ID3D11SamplerState* pFontSampler; - ID3D11RasterizerState* pRasterizerState; - ID3D11BlendState* pBlendState; - ID3D11DepthStencilState* pDepthStencilState; - int VertexBufferSize; - int IndexBufferSize; - D3D11::Texture FontTexture; - - ImGui_ImplDX11_Data() { memset((void*)this, 0, sizeof(*this)); VertexBufferSize = 5000; IndexBufferSize = 10000; } -}; - -struct VERTEX_CONSTANT_BUFFER -{ - float mvp[4][4]; -}; - -// Backend data stored in io.BackendRendererUserData to allow support for multiple Dear ImGui contexts -// It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts. -static ImGui_ImplDX11_Data* ImGui_ImplDX11_GetBackendData() -{ - return ImGui::GetCurrentContext() ? (ImGui_ImplDX11_Data*)ImGui::GetIO().BackendRendererUserData : NULL; -} - -// Functions -static void ImGui_ImplDX11_SetupRenderState(ImDrawData* draw_data, ID3D11DeviceContext* ctx) -{ - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - - // Setup viewport - D3D11_VIEWPORT vp; - memset(&vp, 0, sizeof(D3D11_VIEWPORT)); - vp.Width = draw_data->DisplaySize.x; - vp.Height = draw_data->DisplaySize.y; - vp.MinDepth = 0.0f; - vp.MaxDepth = 1.0f; - vp.TopLeftX = vp.TopLeftY = 0; - ctx->RSSetViewports(1, &vp); - - // Setup shader and vertex buffers - unsigned int stride = sizeof(ImDrawVert); - unsigned int offset = 0; - ctx->IASetInputLayout(bd->pInputLayout); - ctx->IASetVertexBuffers(0, 1, &bd->pVB, &stride, &offset); - ctx->IASetIndexBuffer(bd->pIB, sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT, 0); - ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - ctx->VSSetShader(bd->pVertexShader, NULL, 0); - ctx->VSSetConstantBuffers(0, 1, &bd->pVertexConstantBuffer); - ctx->PSSetShader(bd->pPixelShader, NULL, 0); - ctx->PSSetSamplers(0, 1, &bd->pFontSampler); - ctx->GSSetShader(NULL, NULL, 0); - ctx->HSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. - ctx->DSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. - ctx->CSSetShader(NULL, NULL, 0); // In theory we should backup and restore this as well.. very infrequently used.. - - // Setup blend state - const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; - ctx->OMSetBlendState(bd->pBlendState, blend_factor, 0xffffffff); - ctx->OMSetDepthStencilState(bd->pDepthStencilState, 0); - ctx->RSSetState(bd->pRasterizerState); -} - -// Render function -void ImGui_ImplDX11_RenderDrawData(ImDrawData* draw_data) -{ - // Avoid rendering when minimized - if (draw_data->DisplaySize.x <= 0.0f || draw_data->DisplaySize.y <= 0.0f) - return; - - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - ID3D11DeviceContext* ctx = bd->pd3dDeviceContext; - - // Create and grow vertex/index buffers if needed - if (!bd->pVB || bd->VertexBufferSize < draw_data->TotalVtxCount) - { - if (bd->pVB) { bd->pVB->Release(); bd->pVB = NULL; } - bd->VertexBufferSize = draw_data->TotalVtxCount + 5000; - D3D11_BUFFER_DESC desc; - memset(&desc, 0, sizeof(D3D11_BUFFER_DESC)); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.ByteWidth = bd->VertexBufferSize * sizeof(ImDrawVert); - desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = 0; - if (bd->pd3dDevice->CreateBuffer(&desc, NULL, &bd->pVB) < 0) - return; - } - if (!bd->pIB || bd->IndexBufferSize < draw_data->TotalIdxCount) - { - if (bd->pIB) { bd->pIB->Release(); bd->pIB = NULL; } - bd->IndexBufferSize = draw_data->TotalIdxCount + 10000; - D3D11_BUFFER_DESC desc; - memset(&desc, 0, sizeof(D3D11_BUFFER_DESC)); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.ByteWidth = bd->IndexBufferSize * sizeof(ImDrawIdx); - desc.BindFlags = D3D11_BIND_INDEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - if (bd->pd3dDevice->CreateBuffer(&desc, NULL, &bd->pIB) < 0) - return; - } - - // Upload vertex/index data into a single contiguous GPU buffer - D3D11_MAPPED_SUBRESOURCE vtx_resource, idx_resource; - if (ctx->Map(bd->pVB, 0, D3D11_MAP_WRITE_DISCARD, 0, &vtx_resource) != S_OK) - return; - if (ctx->Map(bd->pIB, 0, D3D11_MAP_WRITE_DISCARD, 0, &idx_resource) != S_OK) - return; - ImDrawVert* vtx_dst = (ImDrawVert*)vtx_resource.pData; - ImDrawIdx* idx_dst = (ImDrawIdx*)idx_resource.pData; - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); - memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); - vtx_dst += cmd_list->VtxBuffer.Size; - idx_dst += cmd_list->IdxBuffer.Size; - } - ctx->Unmap(bd->pVB, 0); - ctx->Unmap(bd->pIB, 0); - - // Setup orthographic projection matrix into our constant buffer - // Our visible imgui space lies from draw_data->DisplayPos (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. - { - D3D11_MAPPED_SUBRESOURCE mapped_resource; - if (ctx->Map(bd->pVertexConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_resource) != S_OK) - return; - VERTEX_CONSTANT_BUFFER* constant_buffer = (VERTEX_CONSTANT_BUFFER*)mapped_resource.pData; - float L = draw_data->DisplayPos.x; - float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; - float T = draw_data->DisplayPos.y; - float B = draw_data->DisplayPos.y + draw_data->DisplaySize.y; - float mvp[4][4] = - { - { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, - { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, - { 0.0f, 0.0f, 0.5f, 0.0f }, - { (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f }, - }; - memcpy(&constant_buffer->mvp, mvp, sizeof(mvp)); - ctx->Unmap(bd->pVertexConstantBuffer, 0); - } - - // Setup desired DX state - ImGui_ImplDX11_SetupRenderState(draw_data, ctx); - - // Render command lists - // (Because we merged all buffers into a single one, we maintain our own offset into them) - int global_idx_offset = 0; - int global_vtx_offset = 0; - ImVec2 clip_off = draw_data->DisplayPos; - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) - { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != NULL) - { - // User callback, registered via ImDrawList::AddCallback() - // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - ImGui_ImplDX11_SetupRenderState(draw_data, ctx); - else - pcmd->UserCallback(cmd_list, pcmd); - } - else - { - // Project scissor/clipping rectangles into framebuffer space - ImVec2 clip_min(pcmd->ClipRect.x - clip_off.x, pcmd->ClipRect.y - clip_off.y); - ImVec2 clip_max(pcmd->ClipRect.z - clip_off.x, pcmd->ClipRect.w - clip_off.y); - if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) - continue; - - // Apply scissor/clipping rectangle - const D3D11_RECT r = { (LONG)clip_min.x, (LONG)clip_min.y, (LONG)clip_max.x, (LONG)clip_max.y }; - ctx->RSSetScissorRects(1, &r); - - // Bind texture, Draw - const D3D11::Texture* tex = static_cast(pcmd->GetTexID()); - ID3D11ShaderResourceView* texture_srv = tex ? tex->GetD3DSRV() : nullptr; - ctx->PSSetShaderResources(0, 1, &texture_srv); - ctx->DrawIndexed(pcmd->ElemCount, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset); - } - } - global_idx_offset += cmd_list->IdxBuffer.Size; - global_vtx_offset += cmd_list->VtxBuffer.Size; - } -} - -bool ImGui_ImplDX11_CreateFontsTexture() -{ - // Build texture atlas - ImGuiIO& io = ImGui::GetIO(); - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); - - const u32 stride = sizeof(u32) * width; - if (!bd->FontTexture.Create(bd->pd3dDevice, width, height, 1, 1, 1, GPUTexture::Format::RGBA8, D3D11_BIND_SHADER_RESOURCE, pixels, stride)) - return false; - - // Store our identifier - io.Fonts->SetTexID((ImTextureID)&bd->FontTexture); - - // Create texture sampler - // (Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling) - if (!bd->pFontSampler) - { - D3D11_SAMPLER_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - desc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - desc.MipLODBias = 0.f; - desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; - desc.MinLOD = 0.f; - desc.MaxLOD = 0.f; - bd->pd3dDevice->CreateSamplerState(&desc, &bd->pFontSampler); - } - - return true; -} - -bool ImGui_ImplDX11_CreateDeviceObjects() -{ - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - if (!bd->pd3dDevice) - return false; - if (bd->pFontSampler) - ImGui_ImplDX11_InvalidateDeviceObjects(); - - // By using D3DCompile() from / d3dcompiler.lib, we introduce a dependency to a given version of d3dcompiler_XX.dll (see D3DCOMPILER_DLL_A) - // If you would like to use this DX11 sample code but remove this dependency you can: - // 1) compile once, save the compiled shader blobs into a file or source code and pass them to CreateVertexShader()/CreatePixelShader() [preferred solution] - // 2) use code to detect any version of the DLL and grab a pointer to D3DCompile from the DLL. - // See https://github.com/ocornut/imgui/pull/638 for sources and details. - - // Create the vertex shader - { - static const char* vertexShader = - "cbuffer vertexBuffer : register(b0) \ - {\ - float4x4 ProjectionMatrix; \ - };\ - struct VS_INPUT\ - {\ - float2 pos : POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - \ - struct PS_INPUT\ - {\ - float4 pos : SV_POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - \ - PS_INPUT main(VS_INPUT input)\ - {\ - PS_INPUT output;\ - output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f));\ - output.col = input.col;\ - output.uv = input.uv;\ - return output;\ - }"; - - ID3DBlob* vertexShaderBlob; - if (FAILED(D3DCompile(vertexShader, strlen(vertexShader), NULL, NULL, NULL, "main", "vs_4_0", 0, 0, &vertexShaderBlob, NULL))) - return false; // NB: Pass ID3DBlob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! - if (bd->pd3dDevice->CreateVertexShader(vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), NULL, &bd->pVertexShader) != S_OK) - { - vertexShaderBlob->Release(); - return false; - } - - // Create the input layout - D3D11_INPUT_ELEMENT_DESC local_layout[] = - { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, - }; - if (bd->pd3dDevice->CreateInputLayout(local_layout, 3, vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), &bd->pInputLayout) != S_OK) - { - vertexShaderBlob->Release(); - return false; - } - vertexShaderBlob->Release(); - - // Create the constant buffer - { - D3D11_BUFFER_DESC desc; - desc.ByteWidth = sizeof(VERTEX_CONSTANT_BUFFER); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = 0; - bd->pd3dDevice->CreateBuffer(&desc, NULL, &bd->pVertexConstantBuffer); - } - } - - // Create the pixel shader - { - static const char* pixelShader = - "struct PS_INPUT\ - {\ - float4 pos : SV_POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - sampler sampler0;\ - Texture2D texture0;\ - \ - float4 main(PS_INPUT input) : SV_Target\ - {\ - float4 out_col = input.col * texture0.Sample(sampler0, input.uv); \ - return out_col; \ - }"; - - ID3DBlob* pixelShaderBlob; - if (FAILED(D3DCompile(pixelShader, strlen(pixelShader), NULL, NULL, NULL, "main", "ps_4_0", 0, 0, &pixelShaderBlob, NULL))) - return false; // NB: Pass ID3DBlob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! - if (bd->pd3dDevice->CreatePixelShader(pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize(), NULL, &bd->pPixelShader) != S_OK) - { - pixelShaderBlob->Release(); - return false; - } - pixelShaderBlob->Release(); - } - - // Create the blending setup - { - D3D11_BLEND_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.AlphaToCoverageEnable = false; - desc.RenderTarget[0].BlendEnable = true; - desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; - desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - bd->pd3dDevice->CreateBlendState(&desc, &bd->pBlendState); - } - - // Create the rasterizer state - { - D3D11_RASTERIZER_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.FillMode = D3D11_FILL_SOLID; - desc.CullMode = D3D11_CULL_NONE; - desc.ScissorEnable = true; - desc.DepthClipEnable = true; - bd->pd3dDevice->CreateRasterizerState(&desc, &bd->pRasterizerState); - } - - // Create depth-stencil State - { - D3D11_DEPTH_STENCIL_DESC desc; - ZeroMemory(&desc, sizeof(desc)); - desc.DepthEnable = false; - desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - desc.DepthFunc = D3D11_COMPARISON_ALWAYS; - desc.StencilEnable = false; - desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; - desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; - desc.BackFace = desc.FrontFace; - bd->pd3dDevice->CreateDepthStencilState(&desc, &bd->pDepthStencilState); - } - - return ImGui_ImplDX11_CreateFontsTexture(); -} - -void ImGui_ImplDX11_InvalidateDeviceObjects() -{ - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - if (!bd->pd3dDevice) - return; - - if (bd->pFontSampler) { bd->pFontSampler->Release(); bd->pFontSampler = NULL; } - if (bd->FontTexture) { bd->FontTexture.Destroy(); ImGui::GetIO().Fonts->SetTexID(NULL); } // We copied data->pFontTextureView to io.Fonts->TexID so let's clear that as well. - if (bd->pIB) { bd->pIB->Release(); bd->pIB = NULL; } - if (bd->pVB) { bd->pVB->Release(); bd->pVB = NULL; } - if (bd->pBlendState) { bd->pBlendState->Release(); bd->pBlendState = NULL; } - if (bd->pDepthStencilState) { bd->pDepthStencilState->Release(); bd->pDepthStencilState = NULL; } - if (bd->pRasterizerState) { bd->pRasterizerState->Release(); bd->pRasterizerState = NULL; } - if (bd->pPixelShader) { bd->pPixelShader->Release(); bd->pPixelShader = NULL; } - if (bd->pVertexConstantBuffer) { bd->pVertexConstantBuffer->Release(); bd->pVertexConstantBuffer = NULL; } - if (bd->pInputLayout) { bd->pInputLayout->Release(); bd->pInputLayout = NULL; } - if (bd->pVertexShader) { bd->pVertexShader->Release(); bd->pVertexShader = NULL; } -} - -bool ImGui_ImplDX11_Init(ID3D11Device* device, ID3D11DeviceContext* device_context) -{ - ImGuiIO& io = ImGui::GetIO(); - IM_ASSERT(io.BackendRendererUserData == NULL && "Already initialized a renderer backend!"); - - // Setup backend capabilities flags - ImGui_ImplDX11_Data* bd = IM_NEW(ImGui_ImplDX11_Data)(); - io.BackendRendererUserData = (void*)bd; - io.BackendRendererName = "imgui_impl_dx11"; - io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. - - // Get factory from device - IDXGIDevice* pDXGIDevice = NULL; - IDXGIAdapter* pDXGIAdapter = NULL; - IDXGIFactory* pFactory = NULL; - - if (device->QueryInterface(IID_PPV_ARGS(&pDXGIDevice)) == S_OK) - if (pDXGIDevice->GetParent(IID_PPV_ARGS(&pDXGIAdapter)) == S_OK) - if (pDXGIAdapter->GetParent(IID_PPV_ARGS(&pFactory)) == S_OK) - { - bd->pd3dDevice = device; - bd->pd3dDeviceContext = device_context; - bd->pFactory = pFactory; - } - if (pDXGIDevice) pDXGIDevice->Release(); - if (pDXGIAdapter) pDXGIAdapter->Release(); - bd->pd3dDevice->AddRef(); - bd->pd3dDeviceContext->AddRef(); - - return ImGui_ImplDX11_CreateDeviceObjects(); -} - -void ImGui_ImplDX11_Shutdown() -{ - ImGui_ImplDX11_Data* bd = ImGui_ImplDX11_GetBackendData(); - IM_ASSERT(bd != NULL && "No renderer backend to shutdown, or already shutdown?"); - if (bd == NULL) - return; - - ImGui_ImplDX11_InvalidateDeviceObjects(); - if (bd->pFactory) { bd->pFactory->Release(); } - if (bd->pd3dDevice) { bd->pd3dDevice->Release(); } - if (bd->pd3dDeviceContext) { bd->pd3dDeviceContext->Release(); } - - ImGuiIO& io = ImGui::GetIO(); - io.BackendRendererName = NULL; - io.BackendRendererUserData = NULL; - IM_DELETE(bd); -} diff --git a/src/util/imgui_impl_dx11.h b/src/util/imgui_impl_dx11.h deleted file mode 100644 index 9eb888935..000000000 --- a/src/util/imgui_impl_dx11.h +++ /dev/null @@ -1,17 +0,0 @@ -// dear imgui: Renderer Backend for DirectX11 -// This needs to be used along with a Platform Backend (e.g. Win32) - -#pragma once -#include "imgui.h" // IMGUI_IMPL_API - -struct ID3D11Device; -struct ID3D11DeviceContext; - -bool ImGui_ImplDX11_Init(ID3D11Device* device, ID3D11DeviceContext* device_context); -void ImGui_ImplDX11_Shutdown(); -void ImGui_ImplDX11_RenderDrawData(ImDrawData* draw_data); - -// Use if you want to reset your rendering device without losing Dear ImGui state. -void ImGui_ImplDX11_InvalidateDeviceObjects(); -bool ImGui_ImplDX11_CreateDeviceObjects(); -bool ImGui_ImplDX11_CreateFontsTexture(); diff --git a/src/util/imgui_impl_dx12.cpp b/src/util/imgui_impl_dx12.cpp deleted file mode 100644 index f1e286eec..000000000 --- a/src/util/imgui_impl_dx12.cpp +++ /dev/null @@ -1,545 +0,0 @@ -// dear imgui: Renderer Backend for DirectX12 -// This needs to be used along with a Platform Backend (e.g. Win32) - -// Implemented features: -// [X] Renderer: User texture binding. Use 'D3D12_GPU_DESCRIPTOR_HANDLE' as ImTextureID. Read the FAQ about ImTextureID! -// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. - -// Important: to compile on 32-bit systems, this backend requires code to be compiled with '#define ImTextureID ImU64'. -// This is because we need ImTextureID to carry a 64-bit value and by default ImTextureID is defined as void*. -// To build this on 32-bit systems: -// - [Solution 1] IDE/msbuild: in "Properties/C++/Preprocessor Definitions" add 'ImTextureID=ImU64' (this is what we do in the 'example_win32_direct12/example_win32_direct12.vcxproj' project file) -// - [Solution 2] IDE/msbuild: in "Properties/C++/Preprocessor Definitions" add 'IMGUI_USER_CONFIG="my_imgui_config.h"' and inside 'my_imgui_config.h' add '#define ImTextureID ImU64' and as many other options as you like. -// - [Solution 3] IDE/msbuild: edit imconfig.h and add '#define ImTextureID ImU64' (prefer solution 2 to create your own config file!) -// - [Solution 4] command-line: add '/D ImTextureID=ImU64' to your cl.exe command-line (this is what we do in the example_win32_direct12/build_win32.bat file) - -// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. -// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. -// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. -// Read online: https://github.com/ocornut/imgui/tree/master/docs - -// CHANGELOG -// (minor and older changes stripped away, please see git history for details) -// 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX). -// 2021-05-19: DirectX12: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement) -// 2021-02-18: DirectX12: Change blending equation to preserve alpha in output buffer. -// 2021-01-11: DirectX12: Improve Windows 7 compatibility (for D3D12On7) by loading d3d12.dll dynamically. -// 2020-09-16: DirectX12: Avoid rendering calls with zero-sized scissor rectangle since it generates a validation layer warning. -// 2020-09-08: DirectX12: Clarified support for building on 32-bit systems by redefining ImTextureID. -// 2019-10-18: DirectX12: *BREAKING CHANGE* Added extra ID3D12DescriptorHeap parameter to ImGui_ImplDX12_Init() function. -// 2019-05-29: DirectX12: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. -// 2019-04-30: DirectX12: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. -// 2019-03-29: Misc: Various minor tidying up. -// 2018-12-03: Misc: Added #pragma comment statement to automatically link with d3dcompiler.lib when using D3DCompile(). -// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. -// 2018-06-12: DirectX12: Moved the ID3D12GraphicsCommandList* parameter from NewFrame() to RenderDrawData(). -// 2018-06-08: Misc: Extracted imgui_impl_dx12.cpp/.h away from the old combined DX12+Win32 example. -// 2018-06-08: DirectX12: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle (to ease support for future multi-viewport). -// 2018-02-22: Merged into master with all Win32 code synchronized to other examples. - -#include "common/windows_headers.h" - -#include "common/assert.h" -#include "common/d3d12/context.h" -#include "common/d3d12/texture.h" -#include "common/d3d12/stream_buffer.h" - -#include "imgui.h" -#include "imgui_impl_dx12.h" - -// DirectX -#include -#include -#include -#ifdef _MSC_VER -#pragma comment(lib, "d3dcompiler") // Automatically link with d3dcompiler.lib as we are using D3DCompile() below. -#endif - -// If we're doing more than this... wtf? -static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; -static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; - -struct ImGui_ImplDX12_Data -{ - D3D12::StreamBuffer VertexStreamBuffer; - D3D12::StreamBuffer IndexStreamBuffer; - D3D12::Texture FontTexture; - ID3D12RootSignature* pRootSignature = nullptr; - ID3D12PipelineState* pPipelineState = nullptr; - DXGI_FORMAT RTVFormat = DXGI_FORMAT_UNKNOWN; -}; - -struct VERTEX_CONSTANT_BUFFER -{ - float mvp[4][4]; -}; - -// Backend data stored in io.BackendRendererUserData to allow support for multiple Dear ImGui contexts -// It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts. -static ImGui_ImplDX12_Data* ImGui_ImplDX12_GetBackendData() -{ - return ImGui::GetCurrentContext() ? (ImGui_ImplDX12_Data*)ImGui::GetIO().BackendRendererUserData : NULL; -} - -// Functions -static void ImGui_ImplDX12_SetupRenderState(ImDrawData* draw_data, ID3D12GraphicsCommandList* ctx) -{ - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - - // Setup orthographic projection matrix into our constant buffer - // Our visible imgui space lies from draw_data->DisplayPos (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). - VERTEX_CONSTANT_BUFFER vertex_constant_buffer; - { - float L = draw_data->DisplayPos.x; - float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; - float T = draw_data->DisplayPos.y; - float B = draw_data->DisplayPos.y + draw_data->DisplaySize.y; - float mvp[4][4] = - { - { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, - { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, - { 0.0f, 0.0f, 0.5f, 0.0f }, - { (R+L)/(L-R), (T+B)/(B-T), 0.5f, 1.0f }, - }; - memcpy(&vertex_constant_buffer.mvp, mvp, sizeof(mvp)); - } - - // Setup viewport - D3D12_VIEWPORT vp; - memset(&vp, 0, sizeof(D3D12_VIEWPORT)); - vp.Width = draw_data->DisplaySize.x; - vp.Height = draw_data->DisplaySize.y; - vp.MinDepth = 0.0f; - vp.MaxDepth = 1.0f; - vp.TopLeftX = vp.TopLeftY = 0.0f; - ctx->RSSetViewports(1, &vp); - - // Bind shader and vertex buffers - unsigned int stride = sizeof(ImDrawVert); - D3D12_VERTEX_BUFFER_VIEW vbv; - memset(&vbv, 0, sizeof(D3D12_VERTEX_BUFFER_VIEW)); - vbv.BufferLocation = bd->VertexStreamBuffer.GetCurrentGPUPointer(); - vbv.SizeInBytes = bd->VertexStreamBuffer.GetCurrentSpace(); - vbv.StrideInBytes = stride; - ctx->IASetVertexBuffers(0, 1, &vbv); - D3D12_INDEX_BUFFER_VIEW ibv; - memset(&ibv, 0, sizeof(D3D12_INDEX_BUFFER_VIEW)); - ibv.BufferLocation = bd->IndexStreamBuffer.GetCurrentGPUPointer(); - ibv.SizeInBytes = bd->IndexStreamBuffer.GetCurrentSpace(); - ibv.Format = sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; - ctx->IASetIndexBuffer(&ibv); - ctx->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - ctx->SetPipelineState(bd->pPipelineState); - ctx->SetGraphicsRootSignature(bd->pRootSignature); - ctx->SetGraphicsRoot32BitConstants(0, 16, &vertex_constant_buffer, 0); - - // Setup blend factor - const float blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; - ctx->OMSetBlendFactor(blend_factor); -} - -template -static inline void SafeRelease(T*& res) -{ - if (res) - res->Release(); - res = NULL; -} - -// Render function -void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data) -{ - // Avoid rendering when minimized - if (draw_data->DisplaySize.x <= 0.0f || draw_data->DisplaySize.y <= 0.0f) - return; - - // FIXME: I'm assuming that this only gets called once per frame! - // If not, we can't just re-allocate the IB or VB, we'll have to do a proper allocator. - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - - const u32 needed_vb = draw_data->TotalVtxCount * sizeof(ImDrawVert); - const u32 needed_ib = draw_data->TotalIdxCount * sizeof(ImDrawIdx); - - if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) || - !bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx))) - { - g_d3d12_context->ExecuteCommandList(false); - if (!bd->VertexStreamBuffer.ReserveMemory(needed_vb, sizeof(ImDrawVert)) || - !bd->IndexStreamBuffer.ReserveMemory(needed_ib, sizeof(ImDrawIdx))) - { - Panic("Failed to allocate space for imgui vertices/indices"); - } - } - - // Upload vertex/index data into a single contiguous GPU buffer - ImDrawVert* vtx_dst = (ImDrawVert*)bd->VertexStreamBuffer.GetCurrentHostPointer(); - ImDrawIdx* idx_dst = (ImDrawIdx*)bd->IndexStreamBuffer.GetCurrentHostPointer(); - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - memcpy(vtx_dst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); - memcpy(idx_dst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); - vtx_dst += cmd_list->VtxBuffer.Size; - idx_dst += cmd_list->IdxBuffer.Size; - } - - // Setup desired DX state (must happen before commit, because it uses the offsets) - ID3D12GraphicsCommandList* ctx = g_d3d12_context->GetCommandList(); - ImGui_ImplDX12_SetupRenderState(draw_data, ctx); - bd->VertexStreamBuffer.CommitMemory(needed_vb); - bd->IndexStreamBuffer.CommitMemory(needed_ib); - - // Render command lists - // (Because we merged all buffers into a single one, we maintain our own offset into them) - int global_vtx_offset = 0; - int global_idx_offset = 0; - ImVec2 clip_off = draw_data->DisplayPos; - const D3D12::Texture* last_texture = nullptr; - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) - { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != NULL) - { - // User callback, registered via ImDrawList::AddCallback() - // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - ImGui_ImplDX12_SetupRenderState(draw_data, ctx); - else - pcmd->UserCallback(cmd_list, pcmd); - } - else - { - // Project scissor/clipping rectangles into framebuffer space - ImVec2 clip_min(pcmd->ClipRect.x - clip_off.x, pcmd->ClipRect.y - clip_off.y); - ImVec2 clip_max(pcmd->ClipRect.z - clip_off.x, pcmd->ClipRect.w - clip_off.y); - if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) - continue; - - // Apply Scissor/clipping rectangle, Bind texture, Draw - const D3D12_RECT r = { (LONG)clip_min.x, (LONG)clip_min.y, (LONG)clip_max.x, (LONG)clip_max.y }; - - const D3D12::Texture* tex = (D3D12::Texture*)pcmd->GetTexID(); - if (tex && last_texture != tex) - { -#if 0 - // for when we redo the descriptor stuff - D3D12::DescriptorHandle handle; - if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle)) - { - // ugh. - g_d3d12_context->ExecuteCommandList(false); - ctx = g_d3d12_context->GetCommandList(); - ImGui_ImplDX12_SetupRenderState(draw_data, ctx); - if (!g_d3d12_context->GetDescriptorAllocator().Allocate(1, &handle)) - Panic("Failed to allocate descriptor after cmdlist kick"); - } - - g_d3d12_context->GetDevice()->CopyDescriptorsSimple(1, handle, tex->GetSRVDescriptor(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - ctx->SetGraphicsRootDescriptorTable(1, handle); -#else - ctx->SetGraphicsRootDescriptorTable(1, tex->GetSRVDescriptor()); -#endif - last_texture = tex; - } - - ctx->RSSetScissorRects(1, &r); - ctx->DrawIndexedInstanced(pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); - } - } - global_idx_offset += cmd_list->IdxBuffer.Size; - global_vtx_offset += cmd_list->VtxBuffer.Size; - } -} - -bool ImGui_ImplDX12_CreateFontsTexture() -{ - // Build texture atlas - ImGuiIO& io = ImGui::GetIO(); - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); - - // Upload texture to graphics system - if (bd->FontTexture.GetWidth() != static_cast(width) || bd->FontTexture.GetHeight() != static_cast(height)) - { - if (!bd->FontTexture.Create(width, height, 1, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_NONE)) - { - return false; - } - } - -#if 0 - if (!bd->FontTexture.LoadData(g_d3d12_context->GetInitCommandList(), 0, 0, 0, width, height, pixels, width * sizeof(u32))) - return false; -#else - if (!bd->FontTexture.LoadData(0, 0, width, height, pixels, width * sizeof(u32))) - return false; -#endif - - io.Fonts->SetTexID((ImTextureID)&bd->FontTexture); - return true; -} - -bool ImGui_ImplDX12_CreateDeviceObjects() -{ - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - if (bd->pPipelineState) - ImGui_ImplDX12_DestroyDeviceObjects(); - - // Create the root signature - { - D3D12_DESCRIPTOR_RANGE descRange = {}; - descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - descRange.NumDescriptors = 1; - descRange.BaseShaderRegister = 0; - descRange.RegisterSpace = 0; - descRange.OffsetInDescriptorsFromTableStart = 0; - - D3D12_ROOT_PARAMETER param[2] = {}; - - param[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - param[0].Constants.ShaderRegister = 0; - param[0].Constants.RegisterSpace = 0; - param[0].Constants.Num32BitValues = 16; - param[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; - - param[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - param[1].DescriptorTable.NumDescriptorRanges = 1; - param[1].DescriptorTable.pDescriptorRanges = &descRange; - param[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - - // Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling. - D3D12_STATIC_SAMPLER_DESC staticSampler = {}; - staticSampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; - staticSampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - staticSampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - staticSampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - staticSampler.MipLODBias = 0.f; - staticSampler.MaxAnisotropy = 0; - staticSampler.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - staticSampler.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - staticSampler.MinLOD = 0.f; - staticSampler.MaxLOD = 0.f; - staticSampler.ShaderRegister = 0; - staticSampler.RegisterSpace = 0; - staticSampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - - D3D12_ROOT_SIGNATURE_DESC desc = {}; - desc.NumParameters = _countof(param); - desc.pParameters = param; - desc.NumStaticSamplers = 1; - desc.pStaticSamplers = &staticSampler; - desc.Flags = - D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | - D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS; - - auto blob = g_d3d12_context->SerializeRootSignature(&desc); - if (!blob) - return false; - - g_d3d12_context->GetDevice()->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&bd->pRootSignature)); - } - - // By using D3DCompile() from / d3dcompiler.lib, we introduce a dependency to a given version of d3dcompiler_XX.dll (see D3DCOMPILER_DLL_A) - // If you would like to use this DX12 sample code but remove this dependency you can: - // 1) compile once, save the compiled shader blobs into a file or source code and pass them to CreateVertexShader()/CreatePixelShader() [preferred solution] - // 2) use code to detect any version of the DLL and grab a pointer to D3DCompile from the DLL. - // See https://github.com/ocornut/imgui/pull/638 for sources and details. - - D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc; - memset(&psoDesc, 0, sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC)); - psoDesc.NodeMask = 1; - psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - psoDesc.pRootSignature = bd->pRootSignature; - psoDesc.SampleMask = UINT_MAX; - psoDesc.NumRenderTargets = 1; - psoDesc.RTVFormats[0] = bd->RTVFormat; - psoDesc.SampleDesc.Count = 1; - psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - - ID3DBlob* vertexShaderBlob; - ID3DBlob* pixelShaderBlob; - - // Create the vertex shader - { - static const char* vertexShader = - "cbuffer vertexBuffer : register(b0) \ - {\ - float4x4 ProjectionMatrix; \ - };\ - struct VS_INPUT\ - {\ - float2 pos : POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - \ - struct PS_INPUT\ - {\ - float4 pos : SV_POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - \ - PS_INPUT main(VS_INPUT input)\ - {\ - PS_INPUT output;\ - output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f));\ - output.col = input.col;\ - output.uv = input.uv;\ - return output;\ - }"; - - if (FAILED(D3DCompile(vertexShader, strlen(vertexShader), NULL, NULL, NULL, "main", "vs_5_0", 0, 0, &vertexShaderBlob, NULL))) - return false; // NB: Pass ID3D10Blob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! - psoDesc.VS = { vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize() }; - - // Create the input layout - static D3D12_INPUT_ELEMENT_DESC local_layout[] = - { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, - { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, - { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, - }; - psoDesc.InputLayout = { local_layout, 3 }; - } - - // Create the pixel shader - { - static const char* pixelShader = - "struct PS_INPUT\ - {\ - float4 pos : SV_POSITION;\ - float4 col : COLOR0;\ - float2 uv : TEXCOORD0;\ - };\ - SamplerState sampler0 : register(s0);\ - Texture2D texture0 : register(t0);\ - \ - float4 main(PS_INPUT input) : SV_Target\ - {\ - float4 out_col = input.col * texture0.Sample(sampler0, input.uv); \ - return out_col; \ - }"; - - if (FAILED(D3DCompile(pixelShader, strlen(pixelShader), NULL, NULL, NULL, "main", "ps_5_0", 0, 0, &pixelShaderBlob, NULL))) - { - vertexShaderBlob->Release(); - return false; // NB: Pass ID3D10Blob* pErrorBlob to D3DCompile() to get error showing in (const char*)pErrorBlob->GetBufferPointer(). Make sure to Release() the blob! - } - psoDesc.PS = { pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize() }; - } - - // Create the blending setup - { - D3D12_BLEND_DESC& desc = psoDesc.BlendState; - desc.AlphaToCoverageEnable = false; - desc.RenderTarget[0].BlendEnable = true; - desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; - desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; - desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; - desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; - desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - } - - // Create the rasterizer state - { - D3D12_RASTERIZER_DESC& desc = psoDesc.RasterizerState; - desc.FillMode = D3D12_FILL_MODE_SOLID; - desc.CullMode = D3D12_CULL_MODE_NONE; - desc.FrontCounterClockwise = FALSE; - desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; - desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; - desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; - desc.DepthClipEnable = true; - desc.MultisampleEnable = FALSE; - desc.AntialiasedLineEnable = FALSE; - desc.ForcedSampleCount = 0; - desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; - } - - // Create depth-stencil State - { - D3D12_DEPTH_STENCIL_DESC& desc = psoDesc.DepthStencilState; - desc.DepthEnable = false; - desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; - desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - desc.StencilEnable = false; - desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; - desc.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; - desc.BackFace = desc.FrontFace; - } - - HRESULT result_pipeline_state = g_d3d12_context->GetDevice()->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&bd->pPipelineState)); - vertexShaderBlob->Release(); - pixelShaderBlob->Release(); - if (result_pipeline_state != S_OK) - return false; - - return true; -} - -void ImGui_ImplDX12_DestroyDeviceObjects() -{ - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - if (!bd) - return; - ImGuiIO& io = ImGui::GetIO(); - - SafeRelease(bd->pRootSignature); - SafeRelease(bd->pPipelineState); - bd->FontTexture.Destroy(false); - bd->VertexStreamBuffer.Destroy(false); - bd->IndexStreamBuffer.Destroy(false); - io.Fonts->SetTexID(NULL); // We copied bd->pFontTextureView to io.Fonts->TexID so let's clear that as well. -} - -bool ImGui_ImplDX12_Init(DXGI_FORMAT rtv_format) -{ - ImGuiIO& io = ImGui::GetIO(); - IM_ASSERT(io.BackendRendererUserData == NULL && "Already initialized a renderer backend!"); - - // Setup backend capabilities flags - ImGui_ImplDX12_Data* bd = IM_NEW(ImGui_ImplDX12_Data)(); - io.BackendRendererUserData = (void*)bd; - io.BackendRendererName = "imgui_impl_dx12"; - io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. - - bd->RTVFormat = rtv_format; - - if (!bd->VertexStreamBuffer.Create(VERTEX_BUFFER_SIZE) || !bd->IndexStreamBuffer.Create(INDEX_BUFFER_SIZE)) - return false; - - return ImGui_ImplDX12_CreateDeviceObjects(); -} - -void ImGui_ImplDX12_Shutdown() -{ - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - IM_ASSERT(bd != NULL && "No renderer backend to shutdown, or already shutdown?"); - ImGuiIO& io = ImGui::GetIO(); - - ImGui_ImplDX12_DestroyDeviceObjects(); - io.BackendRendererName = NULL; - io.BackendRendererUserData = NULL; - IM_DELETE(bd); -} - -void ImGui_ImplDX12_NewFrame() -{ - ImGui_ImplDX12_Data* bd = ImGui_ImplDX12_GetBackendData(); - IM_ASSERT(bd != NULL && "Did you call ImGui_ImplDX12_Init()?"); - - if (!bd->pPipelineState) - ImGui_ImplDX12_CreateDeviceObjects(); -} diff --git a/src/util/imgui_impl_dx12.h b/src/util/imgui_impl_dx12.h deleted file mode 100644 index 6776da374..000000000 --- a/src/util/imgui_impl_dx12.h +++ /dev/null @@ -1,14 +0,0 @@ -// dear imgui: Renderer Backend for DirectX12 -// This needs to be used along with a Platform Backend (e.g. Win32) - -#pragma once -#include "imgui.h" // IMGUI_IMPL_API - -bool ImGui_ImplDX12_Init(DXGI_FORMAT rtv_format); -void ImGui_ImplDX12_Shutdown(); -void ImGui_ImplDX12_RenderDrawData(ImDrawData* draw_data); - -// Use if you want to reset your rendering device without losing Dear ImGui state. -void ImGui_ImplDX12_DestroyDeviceObjects(); -bool ImGui_ImplDX12_CreateDeviceObjects(); -bool ImGui_ImplDX12_CreateFontsTexture(); diff --git a/src/util/imgui_impl_opengl3.cpp b/src/util/imgui_impl_opengl3.cpp deleted file mode 100644 index ab606b90f..000000000 --- a/src/util/imgui_impl_opengl3.cpp +++ /dev/null @@ -1,569 +0,0 @@ -// dear imgui: Renderer Backend for modern OpenGL with shaders / programmatic pipeline -// - Desktop GL: 2.x 3.x 4.x -// - Embedded GL: ES 2.0 (WebGL 1.0), ES 3.0 (WebGL 2.0) -// This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..) - -// Implemented features: -// [X] Renderer: User texture binding. Use 'GLuint' OpenGL texture identifier as void*/ImTextureID. Read the FAQ about ImTextureID! -// [x] Renderer: Desktop GL only: Support for large meshes (64k+ vertices) with 16-bit indices. - -// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. -// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. -// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. -// Read online: https://github.com/ocornut/imgui/tree/master/docs - -// CHANGELOG -// (minor and older changes stripped away, please see git history for details) -// 2022-05-13: OpenGL: Fix state corruption on OpenGL ES 2.0 due to not preserving GL_ELEMENT_ARRAY_BUFFER_BINDING and vertex attribute states. -// 2021-12-15: OpenGL: Using buffer orphaning + glBufferSubData(), seems to fix leaks with multi-viewports with some Intel HD drivers. -// 2021-08-23: OpenGL: Fixed ES 3.0 shader ("#version 300 es") use normal precision floats to avoid wobbly rendering at HD resolutions. -// 2021-08-19: OpenGL: Embed and use our own minimal GL loader (imgui_impl_opengl3_loader.h), removing requirement and support for third-party loader. -// 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX). -// 2021-06-25: OpenGL: Use OES_vertex_array extension on Emscripten + backup/restore current state. -// 2021-06-21: OpenGL: Destroy individual vertex/fragment shader objects right after they are linked into the main shader. -// 2021-05-24: OpenGL: Access GL_CLIP_ORIGIN when "GL_ARB_clip_control" extension is detected, inside of just OpenGL 4.5 version. -// 2021-05-19: OpenGL: Replaced direct access to ImDrawCmd::TextureId with a call to ImDrawCmd::GetTexID(). (will become a requirement) -// 2021-04-06: OpenGL: Don't try to read GL_CLIP_ORIGIN unless we're OpenGL 4.5 or greater. -// 2021-02-18: OpenGL: Change blending equation to preserve alpha in output buffer. -// 2021-01-03: OpenGL: Backup, setup and restore GL_STENCIL_TEST state. -// 2020-10-23: OpenGL: Backup, setup and restore GL_PRIMITIVE_RESTART state. -// 2020-10-15: OpenGL: Use glGetString(GL_VERSION) instead of glGetIntegerv(GL_MAJOR_VERSION, ...) when the later returns zero (e.g. Desktop GL 2.x) -// 2020-09-17: OpenGL: Fix to avoid compiling/calling glBindSampler() on ES or pre 3.3 context which have the defines set by a loader. -// 2020-07-10: OpenGL: Added support for glad2 OpenGL loader. -// 2020-05-08: OpenGL: Made default GLSL version 150 (instead of 130) on OSX. -// 2020-04-21: OpenGL: Fixed handling of glClipControl(GL_UPPER_LEFT) by inverting projection matrix. -// 2020-04-12: OpenGL: Fixed context version check mistakenly testing for 4.0+ instead of 3.2+ to enable ImGuiBackendFlags_RendererHasVtxOffset. -// 2020-03-24: OpenGL: Added support for glbinding 2.x OpenGL loader. -// 2020-01-07: OpenGL: Added support for glbinding 3.x OpenGL loader. -// 2019-10-25: OpenGL: Using a combination of GL define and runtime GL version to decide whether to use glDrawElementsBaseVertex(). Fix building with pre-3.2 GL loaders. -// 2019-09-22: OpenGL: Detect default GL loader using __has_include compiler facility. -// 2019-09-16: OpenGL: Tweak initialization code to allow application calling ImGui_ImplOpenGL3_CreateFontsTexture() before the first NewFrame() call. -// 2019-05-29: OpenGL: Desktop GL only: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. -// 2019-04-30: OpenGL: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. -// 2019-03-29: OpenGL: Not calling glBindBuffer more than necessary in the render loop. -// 2019-03-15: OpenGL: Added a GL call + comments in ImGui_ImplOpenGL3_Init() to detect uninitialized GL function loaders early. -// 2019-03-03: OpenGL: Fix support for ES 2.0 (WebGL 1.0). -// 2019-02-20: OpenGL: Fix for OSX not supporting OpenGL 4.5, we don't try to read GL_CLIP_ORIGIN even if defined by the headers/loader. -// 2019-02-11: OpenGL: Projecting clipping rectangles correctly using draw_data->FramebufferScale to allow multi-viewports for retina display. -// 2019-02-01: OpenGL: Using GLSL 410 shaders for any version over 410 (e.g. 430, 450). -// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. -// 2018-11-13: OpenGL: Support for GL 4.5's glClipControl(GL_UPPER_LEFT) / GL_CLIP_ORIGIN. -// 2018-08-29: OpenGL: Added support for more OpenGL loaders: glew and glad, with comments indicative that any loader can be used. -// 2018-08-09: OpenGL: Default to OpenGL ES 3 on iOS and Android. GLSL version default to "#version 300 ES". -// 2018-07-30: OpenGL: Support for GLSL 300 ES and 410 core. Fixes for Emscripten compilation. -// 2018-07-10: OpenGL: Support for more GLSL versions (based on the GLSL version string). Added error output when shaders fail to compile/link. -// 2018-06-08: Misc: Extracted imgui_impl_opengl3.cpp/.h away from the old combined GLFW/SDL+OpenGL3 examples. -// 2018-06-08: OpenGL: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle. -// 2018-05-25: OpenGL: Removed unnecessary backup/restore of GL_ELEMENT_ARRAY_BUFFER_BINDING since this is part of the VAO state. -// 2018-05-14: OpenGL: Making the call to glBindSampler() optional so 3.2 context won't fail if the function is a NULL pointer. -// 2018-03-06: OpenGL: Added const char* glsl_version parameter to ImGui_ImplOpenGL3_Init() so user can override the GLSL version e.g. "#version 150". -// 2018-02-23: OpenGL: Create the VAO in the render function so the setup can more easily be used with multiple shared GL context. -// 2018-02-16: Misc: Obsoleted the io.RenderDrawListsFn callback and exposed ImGui_ImplSdlGL3_RenderDrawData() in the .h file so you can call it yourself. -// 2018-01-07: OpenGL: Changed GLSL shader version from 330 to 150. -// 2017-09-01: OpenGL: Save and restore current bound sampler. Save and restore current polygon mode. -// 2017-05-01: OpenGL: Fixed save and restore of current blend func state. -// 2017-05-01: OpenGL: Fixed save and restore of current GL_ACTIVE_TEXTURE. -// 2016-09-05: OpenGL: Fixed save and restore of current scissor rectangle. -// 2016-07-29: OpenGL: Explicitly setting GL_UNPACK_ROW_LENGTH to reduce issues because SDL changes it. (#752) - -//---------------------------------------- -// OpenGL GLSL GLSL -// version version string -//---------------------------------------- -// 2.0 110 "#version 110" -// 2.1 120 "#version 120" -// 3.0 130 "#version 130" -// 3.1 140 "#version 140" -// 3.2 150 "#version 150" -// 3.3 330 "#version 330 core" -// 4.0 400 "#version 400 core" -// 4.1 410 "#version 410 core" -// 4.2 420 "#version 410 core" -// 4.3 430 "#version 430 core" -// ES 2.0 100 "#version 100" = WebGL 1.0 -// ES 3.0 300 "#version 300 es" = WebGL 2.0 -//---------------------------------------- - -#include "imgui.h" -#include "imgui_impl_opengl3.h" -#include -#if defined(_MSC_VER) && _MSC_VER <= 1500 // MSVC 2008 or earlier -#include // intptr_t -#else -#include // intptr_t -#endif - -// Clang warnings with -Weverything -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wold-style-cast" // warning: use of old-style cast -#pragma clang diagnostic ignored "-Wsign-conversion" // warning: implicit conversion changes signedness -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#endif - -// GL includes -#include "common/gl/loader.h" -#include "common/gl/texture.h" -#include "common/log.h" -Log_SetChannel(ImGui_ImplOpenGL3); - -// OpenGL Data -struct ImGui_ImplOpenGL3_Data -{ - GLuint GlVersion = 0; // Extracted at runtime using GL_MAJOR_VERSION, GL_MINOR_VERSION queries (e.g. 320 for GL 3.2) - char GlslVersionString[32] = {}; // Specified by user or detected based on compile time GL settings. - GL::Texture FontTexture; - GLuint ShaderHandle = 0; - GLint AttribLocationTex = 0; // Uniforms location - GLint AttribLocationProjMtx = 0; - GLuint AttribLocationVtxPos = 0; // Vertex attributes location - GLuint AttribLocationVtxUV = 0; - GLuint AttribLocationVtxColor = 0; - unsigned int VboHandle = 0, ElementsHandle = 0, VaoHandle = 0; - GLsizeiptr VertexBufferSize = 0; - GLsizeiptr IndexBufferSize = 0; - - ImGui_ImplOpenGL3_Data() = default; -}; - -// Backend data stored in io.BackendRendererUserData to allow support for multiple Dear ImGui contexts -// It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts. -static ImGui_ImplOpenGL3_Data* ImGui_ImplOpenGL3_GetBackendData() -{ - return ImGui::GetCurrentContext() ? (ImGui_ImplOpenGL3_Data*)ImGui::GetIO().BackendRendererUserData : NULL; -} - -// Functions -bool ImGui_ImplOpenGL3_Init(const char* glsl_version) -{ - ImGuiIO& io = ImGui::GetIO(); - IM_ASSERT(io.BackendRendererUserData == NULL && "Already initialized a renderer backend!"); - - // Setup backend capabilities flags - ImGui_ImplOpenGL3_Data* bd = IM_NEW(ImGui_ImplOpenGL3_Data)(); - io.BackendRendererUserData = (void*)bd; - io.BackendRendererName = "imgui_impl_opengl3"; - - // Query for GL version (e.g. 320 for GL 3.2) - GLint major = 0; - GLint minor = 0; - glGetIntegerv(GL_MAJOR_VERSION, &major); - glGetIntegerv(GL_MINOR_VERSION, &minor); - if (major == 0 && minor == 0) - { - // Query GL_VERSION in desktop GL 2.x, the string will start with "." - const char* gl_version = (const char*)glGetString(GL_VERSION); - sscanf(gl_version, "%d.%d", &major, &minor); - } - bd->GlVersion = (GLuint)(major * 100 + minor * 10); - - // Store GLSL version string so we can refer to it later in case we recreate shaders. - // Note: GLSL version is NOT the same as GL version. Leave this to NULL if unsure. - if (glsl_version == NULL) - glsl_version = "#version 130"; - - IM_ASSERT((int)strlen(glsl_version) + 2 < IM_ARRAYSIZE(bd->GlslVersionString)); - strcpy(bd->GlslVersionString, glsl_version); - strcat(bd->GlslVersionString, "\n"); - - if (glDrawElementsBaseVertex) - io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. - else - Log_WarningPrintf("Missing glDrawElementsBaseVertex()"); - - return ImGui_ImplOpenGL3_CreateDeviceObjects(); -} - -void ImGui_ImplOpenGL3_Shutdown() -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - IM_ASSERT(bd != NULL && "No renderer backend to shutdown, or already shutdown?"); - ImGuiIO& io = ImGui::GetIO(); - - ImGui_ImplOpenGL3_DestroyDeviceObjects(); - io.BackendRendererName = NULL; - io.BackendRendererUserData = NULL; - IM_DELETE(bd); -} - -static void ImGui_ImplOpenGL3_SetupRenderState(ImDrawData* draw_data, int fb_width, int fb_height) -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - - // Setup render state: alpha-blending enabled, no face culling, no depth testing, scissor enabled, polygon fill - glEnable(GL_BLEND); - glBlendEquation(GL_FUNC_ADD); - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glEnable(GL_SCISSOR_TEST); - if (glPolygonMode) - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - - // Setup viewport, orthographic projection matrix - // Our visible imgui space lies from draw_data->DisplayPos (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. - glViewport(0, 0, (GLsizei)fb_width, (GLsizei)fb_height); - float L = draw_data->DisplayPos.x; - float R = draw_data->DisplayPos.x + draw_data->DisplaySize.x; - float T = draw_data->DisplayPos.y; - float B = draw_data->DisplayPos.y + draw_data->DisplaySize.y; - const float ortho_projection[4][4] = - { - { 2.0f/(R-L), 0.0f, 0.0f, 0.0f }, - { 0.0f, 2.0f/(T-B), 0.0f, 0.0f }, - { 0.0f, 0.0f, -1.0f, 0.0f }, - { (R+L)/(L-R), (T+B)/(B-T), 0.0f, 1.0f }, - }; - glUseProgram(bd->ShaderHandle); - glUniform1i(bd->AttribLocationTex, 0); - glUniformMatrix4fv(bd->AttribLocationProjMtx, 1, GL_FALSE, &ortho_projection[0][0]); - - // Bind vertex/index buffers and setup attributes for ImDrawVert - if (bd->VaoHandle) - glBindVertexArray(bd->VaoHandle); - - glBindBuffer(GL_ARRAY_BUFFER, bd->VboHandle); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, bd->ElementsHandle); - glEnableVertexAttribArray(bd->AttribLocationVtxPos); - glEnableVertexAttribArray(bd->AttribLocationVtxUV); - glEnableVertexAttribArray(bd->AttribLocationVtxColor); - glVertexAttribPointer(bd->AttribLocationVtxPos, 2, GL_FLOAT, GL_FALSE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, pos)); - glVertexAttribPointer(bd->AttribLocationVtxUV, 2, GL_FLOAT, GL_FALSE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, uv)); - glVertexAttribPointer(bd->AttribLocationVtxColor, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(ImDrawVert), (GLvoid*)IM_OFFSETOF(ImDrawVert, col)); -} - -// OpenGL3 Render function. -// Note that this implementation is little overcomplicated because we are saving/setting up/restoring every OpenGL state explicitly. -// This is in order to be able to run within an OpenGL engine that doesn't do so. -void ImGui_ImplOpenGL3_RenderDrawData(ImDrawData* draw_data) -{ - // Avoid rendering when minimized, scale coordinates for retina displays (screen coordinates != framebuffer coordinates) - int fb_width = (int)(draw_data->DisplaySize.x * draw_data->FramebufferScale.x); - int fb_height = (int)(draw_data->DisplaySize.y * draw_data->FramebufferScale.y); - if (fb_width <= 0 || fb_height <= 0) - return; - - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - - // Setup desired GL state - ImGui_ImplOpenGL3_SetupRenderState(draw_data, fb_width, fb_height); - - // Will project scissor/clipping rectangles into framebuffer space - ImVec2 clip_off = draw_data->DisplayPos; // (0,0) unless using multi-viewports - ImVec2 clip_scale = draw_data->FramebufferScale; // (1,1) unless using retina display which are often (2,2) - - // Render command lists - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - - // Upload vertex/index buffers - GLsizeiptr vtx_buffer_size = (GLsizeiptr)cmd_list->VtxBuffer.Size * (int)sizeof(ImDrawVert); - GLsizeiptr idx_buffer_size = (GLsizeiptr)cmd_list->IdxBuffer.Size * (int)sizeof(ImDrawIdx); - if (bd->VertexBufferSize < vtx_buffer_size) - { - bd->VertexBufferSize = vtx_buffer_size; - glBufferData(GL_ARRAY_BUFFER, bd->VertexBufferSize, NULL, GL_STREAM_DRAW); - } - if (bd->IndexBufferSize < idx_buffer_size) - { - bd->IndexBufferSize = idx_buffer_size; - glBufferData(GL_ELEMENT_ARRAY_BUFFER, bd->IndexBufferSize, NULL, GL_STREAM_DRAW); - } - glBufferSubData(GL_ARRAY_BUFFER, 0, vtx_buffer_size, (const GLvoid*)cmd_list->VtxBuffer.Data); - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_size, (const GLvoid*)cmd_list->IdxBuffer.Data); - - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) - { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != NULL) - { - // User callback, registered via ImDrawList::AddCallback() - // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - ImGui_ImplOpenGL3_SetupRenderState(draw_data, fb_width, fb_height); - else - pcmd->UserCallback(cmd_list, pcmd); - } - else - { - // Project scissor/clipping rectangles into framebuffer space - ImVec2 clip_min((pcmd->ClipRect.x - clip_off.x) * clip_scale.x, (pcmd->ClipRect.y - clip_off.y) * clip_scale.y); - ImVec2 clip_max((pcmd->ClipRect.z - clip_off.x) * clip_scale.x, (pcmd->ClipRect.w - clip_off.y) * clip_scale.y); - if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) - continue; - - // Apply scissor/clipping rectangle (Y is inverted in OpenGL) - glScissor((int)clip_min.x, (int)((float)fb_height - clip_max.y), (int)(clip_max.x - clip_min.x), (int)(clip_max.y - clip_min.y)); - - // Bind texture, Draw - const GL::Texture* tex = static_cast(pcmd->GetTexID()); - if (tex) - tex->Bind(); - - if (glDrawElementsBaseVertex) - glDrawElementsBaseVertex(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, sizeof(ImDrawIdx) == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT, (void*)(intptr_t)(pcmd->IdxOffset * sizeof(ImDrawIdx)), (GLint)pcmd->VtxOffset); - else - glDrawElements(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, sizeof(ImDrawIdx) == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT, (void*)(intptr_t)(pcmd->IdxOffset * sizeof(ImDrawIdx))); - } - } - } - - if (bd->VaoHandle) - glBindVertexArray(0); -} - -bool ImGui_ImplOpenGL3_CreateFontsTexture() -{ - ImGuiIO& io = ImGui::GetIO(); - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - - // Build texture atlas - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); // Load as RGBA 32-bit (75% of the memory is wasted, but default font is so small) because it is more likely to be compatible with user's existing shaders. If your ImTextureId represent a higher-level concept than just a GL texture id, consider calling GetTexDataAsAlpha8() instead to save on GPU memory. - - // Upload texture to graphics system - // (Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling) - bd->FontTexture.Create(width, height, 1, 1, 1, GPUTexture::Format::RGBA8, pixels); - bd->FontTexture.SetLinearFilter(true); - - // Store our identifier - io.Fonts->SetTexID(&bd->FontTexture); - return true; -} - -void ImGui_ImplOpenGL3_DestroyFontsTexture() -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - if (bd->FontTexture.IsValid()) - bd->FontTexture.Destroy(); -} - -// If you get an error please report on github. You may try different GL context version or GLSL version. See GL<>GLSL version table at the top of this file. -static bool CheckShader(GLuint handle, const char* desc) -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - GLint status = 0, log_length = 0; - glGetShaderiv(handle, GL_COMPILE_STATUS, &status); - glGetShaderiv(handle, GL_INFO_LOG_LENGTH, &log_length); - if ((GLboolean)status == GL_FALSE) - fprintf(stderr, "ERROR: ImGui_ImplOpenGL3_CreateDeviceObjects: failed to compile %s! With GLSL: %s\n", desc, bd->GlslVersionString); - if (log_length > 1) - { - ImVector buf; - buf.resize((int)(log_length + 1)); - glGetShaderInfoLog(handle, log_length, NULL, (GLchar*)buf.begin()); - fprintf(stderr, "%s\n", buf.begin()); - } - return (GLboolean)status == GL_TRUE; -} - -// If you get an error please report on GitHub. You may try different GL context version or GLSL version. -static bool CheckProgram(GLuint handle, const char* desc) -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - GLint status = 0, log_length = 0; - glGetProgramiv(handle, GL_LINK_STATUS, &status); - glGetProgramiv(handle, GL_INFO_LOG_LENGTH, &log_length); - if ((GLboolean)status == GL_FALSE) - fprintf(stderr, "ERROR: ImGui_ImplOpenGL3_CreateDeviceObjects: failed to link %s! With GLSL %s\n", desc, bd->GlslVersionString); - if (log_length > 1) - { - ImVector buf; - buf.resize((int)(log_length + 1)); - glGetProgramInfoLog(handle, log_length, NULL, (GLchar*)buf.begin()); - fprintf(stderr, "%s\n", buf.begin()); - } - return (GLboolean)status == GL_TRUE; -} - -bool ImGui_ImplOpenGL3_CreateDeviceObjects() -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - - // Parse GLSL version string - int glsl_version = 130; - sscanf(bd->GlslVersionString, "#version %d", &glsl_version); - - const GLchar* vertex_shader_glsl_120 = - "uniform mat4 ProjMtx;\n" - "attribute vec2 Position;\n" - "attribute vec2 UV;\n" - "attribute vec4 Color;\n" - "varying vec2 Frag_UV;\n" - "varying vec4 Frag_Color;\n" - "void main()\n" - "{\n" - " Frag_UV = UV;\n" - " Frag_Color = Color;\n" - " gl_Position = ProjMtx * vec4(Position.xy,0,1);\n" - "}\n"; - - const GLchar* vertex_shader_glsl_130 = - "uniform mat4 ProjMtx;\n" - "in vec2 Position;\n" - "in vec2 UV;\n" - "in vec4 Color;\n" - "out vec2 Frag_UV;\n" - "out vec4 Frag_Color;\n" - "void main()\n" - "{\n" - " Frag_UV = UV;\n" - " Frag_Color = Color;\n" - " gl_Position = ProjMtx * vec4(Position.xy,0,1);\n" - "}\n"; - - const GLchar* vertex_shader_glsl_300_es = - "precision highp float;\n" - "layout (location = 0) in vec2 Position;\n" - "layout (location = 1) in vec2 UV;\n" - "layout (location = 2) in vec4 Color;\n" - "uniform mat4 ProjMtx;\n" - "out vec2 Frag_UV;\n" - "out vec4 Frag_Color;\n" - "void main()\n" - "{\n" - " Frag_UV = UV;\n" - " Frag_Color = Color;\n" - " gl_Position = ProjMtx * vec4(Position.xy,0,1);\n" - "}\n"; - - const GLchar* vertex_shader_glsl_410_core = - "layout (location = 0) in vec2 Position;\n" - "layout (location = 1) in vec2 UV;\n" - "layout (location = 2) in vec4 Color;\n" - "uniform mat4 ProjMtx;\n" - "out vec2 Frag_UV;\n" - "out vec4 Frag_Color;\n" - "void main()\n" - "{\n" - " Frag_UV = UV;\n" - " Frag_Color = Color;\n" - " gl_Position = ProjMtx * vec4(Position.xy,0,1);\n" - "}\n"; - - const GLchar* fragment_shader_glsl_120 = - "#ifdef GL_ES\n" - " precision mediump float;\n" - "#endif\n" - "uniform sampler2D Texture;\n" - "varying vec2 Frag_UV;\n" - "varying vec4 Frag_Color;\n" - "void main()\n" - "{\n" - " gl_FragColor = Frag_Color * texture2D(Texture, Frag_UV.st);\n" - "}\n"; - - const GLchar* fragment_shader_glsl_130 = - "uniform sampler2D Texture;\n" - "in vec2 Frag_UV;\n" - "in vec4 Frag_Color;\n" - "out vec4 Out_Color;\n" - "void main()\n" - "{\n" - " Out_Color = Frag_Color * texture(Texture, Frag_UV.st);\n" - "}\n"; - - const GLchar* fragment_shader_glsl_300_es = - "precision mediump float;\n" - "uniform sampler2D Texture;\n" - "in vec2 Frag_UV;\n" - "in vec4 Frag_Color;\n" - "layout (location = 0) out vec4 Out_Color;\n" - "void main()\n" - "{\n" - " Out_Color = Frag_Color * texture(Texture, Frag_UV.st);\n" - "}\n"; - - const GLchar* fragment_shader_glsl_410_core = - "in vec2 Frag_UV;\n" - "in vec4 Frag_Color;\n" - "uniform sampler2D Texture;\n" - "layout (location = 0) out vec4 Out_Color;\n" - "void main()\n" - "{\n" - " Out_Color = Frag_Color * texture(Texture, Frag_UV.st);\n" - "}\n"; - - // Select shaders matching our GLSL versions - const GLchar* vertex_shader = NULL; - const GLchar* fragment_shader = NULL; - if (glsl_version < 130) - { - vertex_shader = vertex_shader_glsl_120; - fragment_shader = fragment_shader_glsl_120; - } - else if (glsl_version >= 410) - { - vertex_shader = vertex_shader_glsl_410_core; - fragment_shader = fragment_shader_glsl_410_core; - } - else if (glsl_version == 300) - { - vertex_shader = vertex_shader_glsl_300_es; - fragment_shader = fragment_shader_glsl_300_es; - } - else - { - vertex_shader = vertex_shader_glsl_130; - fragment_shader = fragment_shader_glsl_130; - } - - // Create shaders - const GLchar* vertex_shader_with_version[2] = { bd->GlslVersionString, vertex_shader }; - GLuint vert_handle = glCreateShader(GL_VERTEX_SHADER); - glShaderSource(vert_handle, 2, vertex_shader_with_version, NULL); - glCompileShader(vert_handle); - CheckShader(vert_handle, "vertex shader"); - - const GLchar* fragment_shader_with_version[2] = { bd->GlslVersionString, fragment_shader }; - GLuint frag_handle = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(frag_handle, 2, fragment_shader_with_version, NULL); - glCompileShader(frag_handle); - CheckShader(frag_handle, "fragment shader"); - - // Link - bd->ShaderHandle = glCreateProgram(); - glAttachShader(bd->ShaderHandle, vert_handle); - glAttachShader(bd->ShaderHandle, frag_handle); - glLinkProgram(bd->ShaderHandle); - CheckProgram(bd->ShaderHandle, "shader program"); - - glDetachShader(bd->ShaderHandle, vert_handle); - glDetachShader(bd->ShaderHandle, frag_handle); - glDeleteShader(vert_handle); - glDeleteShader(frag_handle); - - bd->AttribLocationTex = glGetUniformLocation(bd->ShaderHandle, "Texture"); - bd->AttribLocationProjMtx = glGetUniformLocation(bd->ShaderHandle, "ProjMtx"); - bd->AttribLocationVtxPos = (GLuint)glGetAttribLocation(bd->ShaderHandle, "Position"); - bd->AttribLocationVtxUV = (GLuint)glGetAttribLocation(bd->ShaderHandle, "UV"); - bd->AttribLocationVtxColor = (GLuint)glGetAttribLocation(bd->ShaderHandle, "Color"); - - // Create buffers - glGenBuffers(1, &bd->VboHandle); - glGenBuffers(1, &bd->ElementsHandle); - - if (glGenVertexArrays) - glGenVertexArrays(1, &bd->VaoHandle); - - return true; -} - -void ImGui_ImplOpenGL3_DestroyDeviceObjects() -{ - ImGui_ImplOpenGL3_Data* bd = ImGui_ImplOpenGL3_GetBackendData(); - if (bd->VaoHandle) { glDeleteVertexArrays(1, &bd->VaoHandle); bd->VaoHandle = 0; } - if (bd->VboHandle) { glDeleteBuffers(1, &bd->VboHandle); bd->VboHandle = 0; } - if (bd->ElementsHandle) { glDeleteBuffers(1, &bd->ElementsHandle); bd->ElementsHandle = 0; } - if (bd->ShaderHandle) { glDeleteProgram(bd->ShaderHandle); bd->ShaderHandle = 0; } - ImGui_ImplOpenGL3_DestroyFontsTexture(); -} - -#if defined(__clang__) -#pragma clang diagnostic pop -#endif diff --git a/src/util/imgui_impl_opengl3.h b/src/util/imgui_impl_opengl3.h deleted file mode 100644 index 0549fe3ca..000000000 --- a/src/util/imgui_impl_opengl3.h +++ /dev/null @@ -1,16 +0,0 @@ -// dear imgui: Renderer Backend for modern OpenGL with shaders / programmatic pipeline -// This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..) - -#pragma once -#include "imgui.h" // IMGUI_IMPL_API - -// Backend API -bool ImGui_ImplOpenGL3_Init(const char* glsl_version = NULL); -void ImGui_ImplOpenGL3_Shutdown(); -void ImGui_ImplOpenGL3_RenderDrawData(ImDrawData* draw_data); - -// (Optional) Called by Init/NewFrame/Shutdown -bool ImGui_ImplOpenGL3_CreateFontsTexture(); -void ImGui_ImplOpenGL3_DestroyFontsTexture(); -bool ImGui_ImplOpenGL3_CreateDeviceObjects(); -void ImGui_ImplOpenGL3_DestroyDeviceObjects(); diff --git a/src/util/imgui_impl_vulkan.cpp b/src/util/imgui_impl_vulkan.cpp deleted file mode 100644 index 381e4a96c..000000000 --- a/src/util/imgui_impl_vulkan.cpp +++ /dev/null @@ -1,685 +0,0 @@ -// dear imgui: Renderer Backend for Vulkan -// This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..) - -// Implemented features: -// [X] Renderer: Support for large meshes (64k+ vertices) with 16-bit indices. -// [!] Renderer: User texture binding. Use 'VkDescriptorSet' as ImTextureID. Read the FAQ about ImTextureID! See https://github.com/ocornut/imgui/pull/914 for discussions. - -// Important: on 32-bit systems, user texture binding is only supported if your imconfig file has '#define ImTextureID ImU64'. -// This is because we need ImTextureID to carry a 64-bit value and by default ImTextureID is defined as void*. -// To build this on 32-bit systems and support texture changes: -// - [Solution 1] IDE/msbuild: in "Properties/C++/Preprocessor Definitions" add 'ImTextureID=ImU64' (this is what we do in our .vcxproj files) -// - [Solution 2] IDE/msbuild: in "Properties/C++/Preprocessor Definitions" add 'IMGUI_USER_CONFIG="my_imgui_config.h"' and inside 'my_imgui_config.h' add '#define ImTextureID ImU64' and as many other options as you like. -// - [Solution 3] IDE/msbuild: edit imconfig.h and add '#define ImTextureID ImU64' (prefer solution 2 to create your own config file!) -// - [Solution 4] command-line: add '/D ImTextureID=ImU64' to your cl.exe command-line (this is what we do in our batch files) - -// You can use unmodified imgui_impl_* files in your project. See examples/ folder for examples of using this. -// Prefer including the entire imgui/ repository into your project (either as a copy or as a submodule), and only build the backends you need. -// If you are new to Dear ImGui, read documentation from the docs/ folder + read the top of imgui.cpp. -// Read online: https://github.com/ocornut/imgui/tree/master/docs - -// The aim of imgui_impl_vulkan.h/.cpp is to be usable in your engine without any modification. -// IF YOU FEEL YOU NEED TO MAKE ANY CHANGE TO THIS CODE, please share them and your feedback at https://github.com/ocornut/imgui/ - -// Important note to the reader who wish to integrate imgui_impl_vulkan.cpp/.h in their own engine/app. -// - Common ImGui_ImplVulkan_XXX functions and structures are used to interface with imgui_impl_vulkan.cpp/.h. -// You will use those if you want to use this rendering backend in your engine/app. -// - Helper ImGui_ImplVulkanH_XXX functions and structures are only used by this example (main.cpp) and by -// the backend itself (imgui_impl_vulkan.cpp), but should PROBABLY NOT be used by your own engine/app code. -// Read comments in imgui_impl_vulkan.h. - -// CHANGELOG -// (minor and older changes stripped away, please see git history for details) -// 2021-10-15: Vulkan: Call vkCmdSetScissor() at the end of render a full-viewport to reduce likehood of issues with people using VK_DYNAMIC_STATE_SCISSOR in their app without calling vkCmdSetScissor() explicitly every frame. -// 2021-06-29: Reorganized backend to pull data from a single structure to facilitate usage with multiple-contexts (all g_XXXX access changed to bd->XXXX). -// 2021-03-22: Vulkan: Fix mapped memory validation error when buffer sizes are not multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize. -// 2021-02-18: Vulkan: Change blending equation to preserve alpha in output buffer. -// 2021-01-27: Vulkan: Added support for custom function load and IMGUI_IMPL_VULKAN_NO_PROTOTYPES by using ImGui_ImplVulkan_LoadFunctions(). -// 2020-11-11: Vulkan: Added support for specifying which subpass to reference during VkPipeline creation. -// 2020-09-07: Vulkan: Added VkPipeline parameter to ImGui_ImplVulkan_RenderDrawData (default to one passed to ImGui_ImplVulkan_Init). -// 2020-05-04: Vulkan: Fixed crash if initial frame has no vertices. -// 2020-04-26: Vulkan: Fixed edge case where render callbacks wouldn't be called if the ImDrawData didn't have vertices. -// 2019-08-01: Vulkan: Added support for specifying multisample count. Set ImGui_ImplVulkan_InitInfo::MSAASamples to one of the VkSampleCountFlagBits values to use, default is non-multisampled as before. -// 2019-05-29: Vulkan: Added support for large mesh (64K+ vertices), enable ImGuiBackendFlags_RendererHasVtxOffset flag. -// 2019-04-30: Vulkan: Added support for special ImDrawCallback_ResetRenderState callback to reset render state. -// 2019-04-04: *BREAKING CHANGE*: Vulkan: Added ImageCount/MinImageCount fields in ImGui_ImplVulkan_InitInfo, required for initialization (was previously a hard #define IMGUI_VK_QUEUED_FRAMES 2). Added ImGui_ImplVulkan_SetMinImageCount(). -// 2019-04-04: Vulkan: Added VkInstance argument to ImGui_ImplVulkanH_CreateWindow() optional helper. -// 2019-04-04: Vulkan: Avoid passing negative coordinates to vkCmdSetScissor, which debug validation layers do not like. -// 2019-04-01: Vulkan: Support for 32-bit index buffer (#define ImDrawIdx unsigned int). -// 2019-02-16: Vulkan: Viewport and clipping rectangles correctly using draw_data->FramebufferScale to allow retina display. -// 2018-11-30: Misc: Setting up io.BackendRendererName so it can be displayed in the About Window. -// 2018-08-25: Vulkan: Fixed mishandled VkSurfaceCapabilitiesKHR::maxImageCount=0 case. -// 2018-06-22: Inverted the parameters to ImGui_ImplVulkan_RenderDrawData() to be consistent with other backends. -// 2018-06-08: Misc: Extracted imgui_impl_vulkan.cpp/.h away from the old combined GLFW+Vulkan example. -// 2018-06-08: Vulkan: Use draw_data->DisplayPos and draw_data->DisplaySize to setup projection matrix and clipping rectangle. -// 2018-03-03: Vulkan: Various refactor, created a couple of ImGui_ImplVulkanH_XXX helper that the example can use and that viewport support will use. -// 2018-03-01: Vulkan: Renamed ImGui_ImplVulkan_Init_Info to ImGui_ImplVulkan_InitInfo and fields to match more closely Vulkan terminology. -// 2018-02-16: Misc: Obsoleted the io.RenderDrawListsFn callback, ImGui_ImplVulkan_Render() calls ImGui_ImplVulkan_RenderDrawData() itself. -// 2018-02-06: Misc: Removed call to ImGui::Shutdown() which is not available from 1.60 WIP, user needs to call CreateContext/DestroyContext themselves. -// 2017-05-15: Vulkan: Fix scissor offset being negative. Fix new Vulkan validation warnings. Set required depth member for buffer image copy. -// 2016-11-13: Vulkan: Fix validation layer warnings and errors and redeclare gl_PerVertex. -// 2016-10-18: Vulkan: Add location decorators & change to use structs as in/out in glsl, update embedded spv (produced with glslangValidator -x). Null the released resources. -// 2016-08-27: Vulkan: Fix Vulkan example for use when a depth buffer is active. - -#include "imgui_impl_vulkan.h" - -#include "common/vulkan/builders.h" -#include "common/vulkan/context.h" -#include "common/vulkan/texture.h" -#include "common/vulkan/stream_buffer.h" -#include "common/vulkan/util.h" - -#include -#include - -// Visual Studio warnings -#ifdef _MSC_VER -#pragma warning (disable: 4127) // condition expression is constant -#endif - -// If we're doing more than this... wtf? -static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; -static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; - -// Vulkan data -struct ImGui_ImplVulkan_Data -{ - VkRenderPass RenderPass = VK_NULL_HANDLE; - VkPipelineCreateFlags PipelineCreateFlags = 0; - VkDescriptorSetLayout DescriptorSetLayout = VK_NULL_HANDLE; - VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; - VkPipeline Pipeline = VK_NULL_HANDLE; - VkShaderModule ShaderModuleVert = VK_NULL_HANDLE; - VkShaderModule ShaderModuleFrag = VK_NULL_HANDLE; - - VkSampler FontSampler = VK_NULL_HANDLE; - - Vulkan::StreamBuffer VertexStreamBuffer; - Vulkan::StreamBuffer IndexStreamBuffer; - Vulkan::Texture FontTexture; -}; - -// Forward Declarations -static bool ImGui_ImplVulkan_CreateDeviceObjects(); -static void ImGui_ImplVulkan_DestroyDeviceObjects(); - -//----------------------------------------------------------------------------- -// SHADERS -//----------------------------------------------------------------------------- - -// glsl_shader.vert, compiled with: -// # glslangValidator -V -x -o glsl_shader.vert.u32 glsl_shader.vert -/* -#version 450 core -layout(location = 0) in vec2 aPos; -layout(location = 1) in vec2 aUV; -layout(location = 2) in vec4 aColor; -layout(push_constant) uniform uPushConstant { vec2 uScale; vec2 uTranslate; } pc; - -out gl_PerVertex { vec4 gl_Position; }; -layout(location = 0) out struct { vec4 Color; vec2 UV; } Out; - -void main() -{ - Out.Color = aColor; - Out.UV = aUV; - gl_Position = vec4(aPos * pc.uScale + pc.uTranslate, 0, 1); -} -*/ -static uint32_t __glsl_shader_vert_spv[] = -{ - 0x07230203,0x00010000,0x00080001,0x0000002e,0x00000000,0x00020011,0x00000001,0x0006000b, - 0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001, - 0x000a000f,0x00000000,0x00000004,0x6e69616d,0x00000000,0x0000000b,0x0000000f,0x00000015, - 0x0000001b,0x0000001c,0x00030003,0x00000002,0x000001c2,0x00040005,0x00000004,0x6e69616d, - 0x00000000,0x00030005,0x00000009,0x00000000,0x00050006,0x00000009,0x00000000,0x6f6c6f43, - 0x00000072,0x00040006,0x00000009,0x00000001,0x00005655,0x00030005,0x0000000b,0x0074754f, - 0x00040005,0x0000000f,0x6c6f4361,0x0000726f,0x00030005,0x00000015,0x00565561,0x00060005, - 0x00000019,0x505f6c67,0x65567265,0x78657472,0x00000000,0x00060006,0x00000019,0x00000000, - 0x505f6c67,0x7469736f,0x006e6f69,0x00030005,0x0000001b,0x00000000,0x00040005,0x0000001c, - 0x736f5061,0x00000000,0x00060005,0x0000001e,0x73755075,0x6e6f4368,0x6e617473,0x00000074, - 0x00050006,0x0000001e,0x00000000,0x61635375,0x0000656c,0x00060006,0x0000001e,0x00000001, - 0x61725475,0x616c736e,0x00006574,0x00030005,0x00000020,0x00006370,0x00040047,0x0000000b, - 0x0000001e,0x00000000,0x00040047,0x0000000f,0x0000001e,0x00000002,0x00040047,0x00000015, - 0x0000001e,0x00000001,0x00050048,0x00000019,0x00000000,0x0000000b,0x00000000,0x00030047, - 0x00000019,0x00000002,0x00040047,0x0000001c,0x0000001e,0x00000000,0x00050048,0x0000001e, - 0x00000000,0x00000023,0x00000000,0x00050048,0x0000001e,0x00000001,0x00000023,0x00000008, - 0x00030047,0x0000001e,0x00000002,0x00020013,0x00000002,0x00030021,0x00000003,0x00000002, - 0x00030016,0x00000006,0x00000020,0x00040017,0x00000007,0x00000006,0x00000004,0x00040017, - 0x00000008,0x00000006,0x00000002,0x0004001e,0x00000009,0x00000007,0x00000008,0x00040020, - 0x0000000a,0x00000003,0x00000009,0x0004003b,0x0000000a,0x0000000b,0x00000003,0x00040015, - 0x0000000c,0x00000020,0x00000001,0x0004002b,0x0000000c,0x0000000d,0x00000000,0x00040020, - 0x0000000e,0x00000001,0x00000007,0x0004003b,0x0000000e,0x0000000f,0x00000001,0x00040020, - 0x00000011,0x00000003,0x00000007,0x0004002b,0x0000000c,0x00000013,0x00000001,0x00040020, - 0x00000014,0x00000001,0x00000008,0x0004003b,0x00000014,0x00000015,0x00000001,0x00040020, - 0x00000017,0x00000003,0x00000008,0x0003001e,0x00000019,0x00000007,0x00040020,0x0000001a, - 0x00000003,0x00000019,0x0004003b,0x0000001a,0x0000001b,0x00000003,0x0004003b,0x00000014, - 0x0000001c,0x00000001,0x0004001e,0x0000001e,0x00000008,0x00000008,0x00040020,0x0000001f, - 0x00000009,0x0000001e,0x0004003b,0x0000001f,0x00000020,0x00000009,0x00040020,0x00000021, - 0x00000009,0x00000008,0x0004002b,0x00000006,0x00000028,0x00000000,0x0004002b,0x00000006, - 0x00000029,0x3f800000,0x00050036,0x00000002,0x00000004,0x00000000,0x00000003,0x000200f8, - 0x00000005,0x0004003d,0x00000007,0x00000010,0x0000000f,0x00050041,0x00000011,0x00000012, - 0x0000000b,0x0000000d,0x0003003e,0x00000012,0x00000010,0x0004003d,0x00000008,0x00000016, - 0x00000015,0x00050041,0x00000017,0x00000018,0x0000000b,0x00000013,0x0003003e,0x00000018, - 0x00000016,0x0004003d,0x00000008,0x0000001d,0x0000001c,0x00050041,0x00000021,0x00000022, - 0x00000020,0x0000000d,0x0004003d,0x00000008,0x00000023,0x00000022,0x00050085,0x00000008, - 0x00000024,0x0000001d,0x00000023,0x00050041,0x00000021,0x00000025,0x00000020,0x00000013, - 0x0004003d,0x00000008,0x00000026,0x00000025,0x00050081,0x00000008,0x00000027,0x00000024, - 0x00000026,0x00050051,0x00000006,0x0000002a,0x00000027,0x00000000,0x00050051,0x00000006, - 0x0000002b,0x00000027,0x00000001,0x00070050,0x00000007,0x0000002c,0x0000002a,0x0000002b, - 0x00000028,0x00000029,0x00050041,0x00000011,0x0000002d,0x0000001b,0x0000000d,0x0003003e, - 0x0000002d,0x0000002c,0x000100fd,0x00010038 -}; - -// glsl_shader.frag, compiled with: -// # glslangValidator -V -x -o glsl_shader.frag.u32 glsl_shader.frag -/* -#version 450 core -layout(location = 0) out vec4 fColor; -layout(set=0, binding=0) uniform sampler2D sTexture; -layout(location = 0) in struct { vec4 Color; vec2 UV; } In; -void main() -{ - fColor = In.Color * texture(sTexture, In.UV.st); -} -*/ -static uint32_t __glsl_shader_frag_spv[] = -{ - 0x07230203,0x00010000,0x00080001,0x0000001e,0x00000000,0x00020011,0x00000001,0x0006000b, - 0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e,0x00000000,0x00000001, - 0x0007000f,0x00000004,0x00000004,0x6e69616d,0x00000000,0x00000009,0x0000000d,0x00030010, - 0x00000004,0x00000007,0x00030003,0x00000002,0x000001c2,0x00040005,0x00000004,0x6e69616d, - 0x00000000,0x00040005,0x00000009,0x6c6f4366,0x0000726f,0x00030005,0x0000000b,0x00000000, - 0x00050006,0x0000000b,0x00000000,0x6f6c6f43,0x00000072,0x00040006,0x0000000b,0x00000001, - 0x00005655,0x00030005,0x0000000d,0x00006e49,0x00050005,0x00000016,0x78655473,0x65727574, - 0x00000000,0x00040047,0x00000009,0x0000001e,0x00000000,0x00040047,0x0000000d,0x0000001e, - 0x00000000,0x00040047,0x00000016,0x00000022,0x00000000,0x00040047,0x00000016,0x00000021, - 0x00000000,0x00020013,0x00000002,0x00030021,0x00000003,0x00000002,0x00030016,0x00000006, - 0x00000020,0x00040017,0x00000007,0x00000006,0x00000004,0x00040020,0x00000008,0x00000003, - 0x00000007,0x0004003b,0x00000008,0x00000009,0x00000003,0x00040017,0x0000000a,0x00000006, - 0x00000002,0x0004001e,0x0000000b,0x00000007,0x0000000a,0x00040020,0x0000000c,0x00000001, - 0x0000000b,0x0004003b,0x0000000c,0x0000000d,0x00000001,0x00040015,0x0000000e,0x00000020, - 0x00000001,0x0004002b,0x0000000e,0x0000000f,0x00000000,0x00040020,0x00000010,0x00000001, - 0x00000007,0x00090019,0x00000013,0x00000006,0x00000001,0x00000000,0x00000000,0x00000000, - 0x00000001,0x00000000,0x0003001b,0x00000014,0x00000013,0x00040020,0x00000015,0x00000000, - 0x00000014,0x0004003b,0x00000015,0x00000016,0x00000000,0x0004002b,0x0000000e,0x00000018, - 0x00000001,0x00040020,0x00000019,0x00000001,0x0000000a,0x00050036,0x00000002,0x00000004, - 0x00000000,0x00000003,0x000200f8,0x00000005,0x00050041,0x00000010,0x00000011,0x0000000d, - 0x0000000f,0x0004003d,0x00000007,0x00000012,0x00000011,0x0004003d,0x00000014,0x00000017, - 0x00000016,0x00050041,0x00000019,0x0000001a,0x0000000d,0x00000018,0x0004003d,0x0000000a, - 0x0000001b,0x0000001a,0x00050057,0x00000007,0x0000001c,0x00000017,0x0000001b,0x00050085, - 0x00000007,0x0000001d,0x00000012,0x0000001c,0x0003003e,0x00000009,0x0000001d,0x000100fd, - 0x00010038 -}; - -//----------------------------------------------------------------------------- -// FUNCTIONS -//----------------------------------------------------------------------------- - -// Backend data stored in io.BackendRendererUserData to allow support for multiple Dear ImGui contexts -// It is STRONGLY preferred that you use docking branch with multi-viewports (== single Dear ImGui context + multiple windows) instead of multiple Dear ImGui contexts. -// FIXME: multi-context support is not tested and probably dysfunctional in this backend. -static ImGui_ImplVulkan_Data* ImGui_ImplVulkan_GetBackendData() -{ - return ImGui::GetCurrentContext() ? (ImGui_ImplVulkan_Data*)ImGui::GetIO().BackendRendererUserData : NULL; -} - -static void ImGui_ImplVulkan_SetupRenderState(ImDrawData* draw_data, VkPipeline pipeline, VkCommandBuffer command_buffer, int fb_width, int fb_height) -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - - // Bind pipeline: - { - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - // Bind Vertex And Index Buffer: - if (draw_data->TotalVtxCount > 0) - { - VkBuffer vertex_buffers[1] = { bd->VertexStreamBuffer.GetBuffer() }; - VkDeviceSize vertex_offset[1] = { bd->VertexStreamBuffer.GetCurrentOffset() }; - vkCmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset); - vkCmdBindIndexBuffer(command_buffer, bd->IndexStreamBuffer.GetBuffer(), bd->IndexStreamBuffer.GetCurrentOffset(), sizeof(ImDrawIdx) == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); - } - - // Setup viewport: - { - VkViewport viewport; - viewport.x = 0; - viewport.y = 0; - viewport.width = (float)fb_width; - viewport.height = (float)fb_height; - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(command_buffer, 0, 1, &viewport); - } - - // Setup scale and translation: - // Our visible imgui space lies from draw_data->DisplayPps (top left) to draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. - { - float scale[2]; - scale[0] = 2.0f / draw_data->DisplaySize.x; - scale[1] = 2.0f / draw_data->DisplaySize.y; - float translate[2]; - translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0]; - translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1]; - vkCmdPushConstants(command_buffer, bd->PipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 0, sizeof(float) * 2, scale); - vkCmdPushConstants(command_buffer, bd->PipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, sizeof(float) * 2, sizeof(float) * 2, translate); - } -} - -// Render function -void ImGui_ImplVulkan_RenderDrawData(ImDrawData* draw_data) -{ - // Avoid rendering when minimized, scale coordinates for retina displays (screen coordinates != framebuffer coordinates) - int fb_width = (int)(draw_data->DisplaySize.x * draw_data->FramebufferScale.x); - int fb_height = (int)(draw_data->DisplaySize.y * draw_data->FramebufferScale.y); - if (fb_width <= 0 || fb_height <= 0) - return; - - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (draw_data->TotalVtxCount > 0) - { - // Create or resize the vertex/index buffers - const u32 vertex_size = static_cast(draw_data->TotalVtxCount) * static_cast(sizeof(ImDrawVert)); - const u32 index_size = static_cast(draw_data->TotalIdxCount) * static_cast(sizeof(ImDrawIdx)); - if (!bd->VertexStreamBuffer.ReserveMemory(vertex_size, static_cast(sizeof(ImDrawVert))) || - !bd->IndexStreamBuffer.ReserveMemory(index_size, static_cast(sizeof(ImDrawIdx)))) - { - // this is annoying, because we can't restart the render pass... - return; - } - - // Upload vertex/index data into a single contiguous GPU buffer - ImDrawVert* vtx_dst = (ImDrawVert*)bd->VertexStreamBuffer.GetCurrentHostPointer(); - ImDrawIdx* idx_dst = (ImDrawIdx*)bd->IndexStreamBuffer.GetCurrentHostPointer(); - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - memcpy(vtx_dst, cmd_list->VtxBuffer.Data, static_cast(cmd_list->VtxBuffer.Size) * static_cast(sizeof(ImDrawVert))); - memcpy(idx_dst, cmd_list->IdxBuffer.Data, static_cast(cmd_list->IdxBuffer.Size) * static_cast(sizeof(ImDrawIdx))); - vtx_dst += static_cast(cmd_list->VtxBuffer.Size); - idx_dst += static_cast(cmd_list->IdxBuffer.Size); - } - - // Setup desired Vulkan state (must come before buffer commit) - ImGui_ImplVulkan_SetupRenderState(draw_data, bd->Pipeline, g_vulkan_context->GetCurrentCommandBuffer(), fb_width, fb_height); - bd->VertexStreamBuffer.CommitMemory(vertex_size); - bd->IndexStreamBuffer.CommitMemory(index_size); - } - - // Will project scissor/clipping rectangles into framebuffer space - ImVec2 clip_off = draw_data->DisplayPos; // (0,0) unless using multi-viewports - ImVec2 clip_scale = draw_data->FramebufferScale; // (1,1) unless using retina display which are often (2,2) - - // Render command lists - // (Because we merged all buffers into a single one, we maintain our own offset into them) - int global_vtx_offset = 0; - int global_idx_offset = 0; - const Vulkan::Texture* last_texture = nullptr; - VkCommandBuffer command_buffer = g_vulkan_context->GetCurrentCommandBuffer(); - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) - { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != NULL) - { - // User callback, registered via ImDrawList::AddCallback() - // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - ImGui_ImplVulkan_SetupRenderState(draw_data, bd->Pipeline, command_buffer, fb_width, fb_height); - else - pcmd->UserCallback(cmd_list, pcmd); - } - else - { - // Project scissor/clipping rectangles into framebuffer space - ImVec2 clip_min((pcmd->ClipRect.x - clip_off.x) * clip_scale.x, (pcmd->ClipRect.y - clip_off.y) * clip_scale.y); - ImVec2 clip_max((pcmd->ClipRect.z - clip_off.x) * clip_scale.x, (pcmd->ClipRect.w - clip_off.y) * clip_scale.y); - - // Clamp to viewport as vkCmdSetScissor() won't accept values that are off bounds - if (clip_min.x < 0.0f) { clip_min.x = 0.0f; } - if (clip_min.y < 0.0f) { clip_min.y = 0.0f; } - if (clip_max.x > fb_width) { clip_max.x = (float)fb_width; } - if (clip_max.y > fb_height) { clip_max.y = (float)fb_height; } - if (clip_max.x <= clip_min.x || clip_max.y <= clip_min.y) - continue; - - // Apply scissor/clipping rectangle - VkRect2D scissor; - scissor.offset.x = (int32_t)(clip_min.x); - scissor.offset.y = (int32_t)(clip_min.y); - scissor.extent.width = (uint32_t)(clip_max.x - clip_min.x); - scissor.extent.height = (uint32_t)(clip_max.y - clip_min.y); - vkCmdSetScissor(command_buffer, 0, 1, &scissor); - - // Bind DescriptorSet with font or user texture - const Vulkan::Texture* tex = (const Vulkan::Texture*)pcmd->TextureId; - if (tex && last_texture != tex) - { - // if we can't get a descriptor set, we'll we're in trouble, since we can't restart the render pass from here. - VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(bd->DescriptorSetLayout); - if (ds == VK_NULL_HANDLE) - { - continue; - } - - Vulkan::DescriptorSetUpdateBuilder dsb; - dsb.AddCombinedImageSamplerDescriptorWrite(ds, 0, tex->GetView(), bd->FontSampler); - dsb.Update(g_vulkan_context->GetDevice()); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, bd->PipelineLayout, 0, 1, &ds, 0, nullptr); - last_texture = tex; - } - - // Draw - vkCmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); - } - } - global_idx_offset += cmd_list->IdxBuffer.Size; - global_vtx_offset += cmd_list->VtxBuffer.Size; - } - - // Note: at this point both vkCmdSetViewport() and vkCmdSetScissor() have been called. - // Our last values will leak into user/application rendering IF: - // - Your app uses a pipeline with VK_DYNAMIC_STATE_VIEWPORT or VK_DYNAMIC_STATE_SCISSOR dynamic state - // - And you forgot to call vkCmdSetViewport() and vkCmdSetScissor() yourself to explicitely set that state. - // If you use VK_DYNAMIC_STATE_VIEWPORT or VK_DYNAMIC_STATE_SCISSOR you are responsible for setting the values before rendering. - // In theory we should aim to backup/restore those values but I am not sure this is possible. - // We perform a call to vkCmdSetScissor() to set back a full viewport which is likely to fix things for 99% users but technically this is not perfect. (See github #4644) - VkRect2D scissor = { { 0, 0 }, { (uint32_t)fb_width, (uint32_t)fb_height } }; - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -bool ImGui_ImplVulkan_CreateFontsTexture() -{ - ImGuiIO& io = ImGui::GetIO(); - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); - - if (bd->FontTexture.GetWidth() != static_cast(width) || bd->FontTexture.GetHeight() != static_cast(height)) - { - if (!bd->FontTexture.Create(width, height, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT)) - { - return false; - } - } - - // Store our identifier - bd->FontTexture.Update(0, 0, width, height, 0, 0, pixels, sizeof(u32) * width); - bd->FontTexture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - io.Fonts->SetTexID((ImTextureID)&bd->FontTexture); - return true; -} - -static bool ImGui_ImplVulkan_CreateShaderModules(VkDevice device) -{ - // Create the shader modules - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (bd->ShaderModuleVert == VK_NULL_HANDLE) - { - VkShaderModuleCreateInfo vert_info = {}; - vert_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - vert_info.codeSize = sizeof(__glsl_shader_vert_spv); - vert_info.pCode = (uint32_t*)__glsl_shader_vert_spv; - VkResult err = vkCreateShaderModule(device, &vert_info, nullptr, &bd->ShaderModuleVert); - if (err != VK_SUCCESS) - return false; - } - if (bd->ShaderModuleFrag == VK_NULL_HANDLE) - { - VkShaderModuleCreateInfo frag_info = {}; - frag_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - frag_info.codeSize = sizeof(__glsl_shader_frag_spv); - frag_info.pCode = (uint32_t*)__glsl_shader_frag_spv; - VkResult err = vkCreateShaderModule(device, &frag_info, nullptr, &bd->ShaderModuleFrag); - if (err != VK_SUCCESS) - return false; - } - - return true; -} - -static bool ImGui_ImplVulkan_CreateFontSampler(VkDevice device) -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (bd->FontSampler) - return true; - - // Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling. - VkSamplerCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - info.magFilter = VK_FILTER_LINEAR; - info.minFilter = VK_FILTER_LINEAR; - info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.minLod = -1000; - info.maxLod = 1000; - info.maxAnisotropy = 1.0f; - VkResult err = vkCreateSampler(device, &info, nullptr, &bd->FontSampler); - return (err == VK_SUCCESS); -} - -static bool ImGui_ImplVulkan_CreateDescriptorSetLayout(VkDevice device) -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (bd->DescriptorSetLayout) - return true; - - if (!ImGui_ImplVulkan_CreateFontSampler(device)) - return false; - - VkSampler sampler[1] = { bd->FontSampler }; - VkDescriptorSetLayoutBinding binding[1] = {}; - binding[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - binding[0].descriptorCount = 1; - binding[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - binding[0].pImmutableSamplers = sampler; - VkDescriptorSetLayoutCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - info.bindingCount = 1; - info.pBindings = binding; - VkResult err = vkCreateDescriptorSetLayout(device, &info, nullptr, &bd->DescriptorSetLayout); - return (err == VK_SUCCESS); -} - -static bool ImGui_ImplVulkan_CreatePipelineLayout(VkDevice device) -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (bd->PipelineLayout) - return true; - - // Constants: we are using 'vec2 offset' and 'vec2 scale' instead of a full 3d projection matrix - ImGui_ImplVulkan_CreateDescriptorSetLayout(device); - VkPushConstantRange push_constants[1] = {}; - push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - push_constants[0].offset = sizeof(float) * 0; - push_constants[0].size = sizeof(float) * 4; - VkDescriptorSetLayout set_layout[1] = { bd->DescriptorSetLayout }; - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = set_layout; - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = push_constants; - VkResult err = vkCreatePipelineLayout(device, &layout_info, nullptr, &bd->PipelineLayout); - return (err == VK_SUCCESS); -} - -static bool ImGui_ImplVulkan_CreatePipeline(VkDevice device, VkPipelineCache pipelineCache, VkRenderPass renderPass, VkPipeline* pipeline) -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - if (!ImGui_ImplVulkan_CreateShaderModules(device)) - return false; - - VkPipelineShaderStageCreateInfo stage[2] = {}; - stage[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - stage[0].module = bd->ShaderModuleVert; - stage[0].pName = "main"; - stage[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - stage[1].module = bd->ShaderModuleFrag; - stage[1].pName = "main"; - - VkVertexInputBindingDescription binding_desc[1] = {}; - binding_desc[0].stride = sizeof(ImDrawVert); - binding_desc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; - - VkVertexInputAttributeDescription attribute_desc[3] = {}; - attribute_desc[0].location = 0; - attribute_desc[0].binding = binding_desc[0].binding; - attribute_desc[0].format = VK_FORMAT_R32G32_SFLOAT; - attribute_desc[0].offset = IM_OFFSETOF(ImDrawVert, pos); - attribute_desc[1].location = 1; - attribute_desc[1].binding = binding_desc[0].binding; - attribute_desc[1].format = VK_FORMAT_R32G32_SFLOAT; - attribute_desc[1].offset = IM_OFFSETOF(ImDrawVert, uv); - attribute_desc[2].location = 2; - attribute_desc[2].binding = binding_desc[0].binding; - attribute_desc[2].format = VK_FORMAT_R8G8B8A8_UNORM; - attribute_desc[2].offset = IM_OFFSETOF(ImDrawVert, col); - - VkPipelineVertexInputStateCreateInfo vertex_info = {}; - vertex_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_info.vertexBindingDescriptionCount = 1; - vertex_info.pVertexBindingDescriptions = binding_desc; - vertex_info.vertexAttributeDescriptionCount = 3; - vertex_info.pVertexAttributeDescriptions = attribute_desc; - - VkPipelineInputAssemblyStateCreateInfo ia_info = {}; - ia_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - ia_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - - VkPipelineViewportStateCreateInfo viewport_info = {}; - viewport_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_info.viewportCount = 1; - viewport_info.scissorCount = 1; - - VkPipelineRasterizationStateCreateInfo raster_info = {}; - raster_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - raster_info.polygonMode = VK_POLYGON_MODE_FILL; - raster_info.cullMode = VK_CULL_MODE_NONE; - raster_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; - raster_info.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo ms_info = {}; - ms_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - ms_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - - VkPipelineColorBlendAttachmentState color_attachment[1] = {}; - color_attachment[0].blendEnable = VK_TRUE; - color_attachment[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - color_attachment[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - color_attachment[0].colorBlendOp = VK_BLEND_OP_ADD; - color_attachment[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; - color_attachment[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - color_attachment[0].alphaBlendOp = VK_BLEND_OP_ADD; - color_attachment[0].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - - VkPipelineDepthStencilStateCreateInfo depth_info = {}; - depth_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - - VkPipelineColorBlendStateCreateInfo blend_info = {}; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.attachmentCount = 1; - blend_info.pAttachments = color_attachment; - - VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; - VkPipelineDynamicStateCreateInfo dynamic_state = {}; - dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state.dynamicStateCount = (uint32_t)IM_ARRAYSIZE(dynamic_states); - dynamic_state.pDynamicStates = dynamic_states; - - if (!ImGui_ImplVulkan_CreatePipelineLayout(device)) - return false; - - VkGraphicsPipelineCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - info.flags = bd->PipelineCreateFlags; - info.stageCount = 2; - info.pStages = stage; - info.pVertexInputState = &vertex_info; - info.pInputAssemblyState = &ia_info; - info.pViewportState = &viewport_info; - info.pRasterizationState = &raster_info; - info.pMultisampleState = &ms_info; - info.pDepthStencilState = &depth_info; - info.pColorBlendState = &blend_info; - info.pDynamicState = &dynamic_state; - info.layout = bd->PipelineLayout; - info.renderPass = renderPass; - info.subpass = 0; - VkResult err = vkCreateGraphicsPipelines(device, pipelineCache, 1, &info, nullptr, pipeline); - return (err == VK_SUCCESS); -} - -bool ImGui_ImplVulkan_CreateDeviceObjects() -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - - if (!bd->VertexStreamBuffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE) || - !bd->IndexStreamBuffer.Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_BUFFER_SIZE)) - { - return false; - } - - if (!ImGui_ImplVulkan_CreatePipeline(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, bd->RenderPass, &bd->Pipeline)) - return false; - - return true; -} - -void ImGui_ImplVulkan_DestroyDeviceObjects() -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - - bd->VertexStreamBuffer.Destroy(false); - bd->IndexStreamBuffer.Destroy(false); - bd->FontTexture.Destroy(false); - - if (bd->ShaderModuleVert) { vkDestroyShaderModule(g_vulkan_context->GetDevice(), bd->ShaderModuleVert, nullptr); bd->ShaderModuleVert = VK_NULL_HANDLE; } - if (bd->ShaderModuleFrag) { vkDestroyShaderModule(g_vulkan_context->GetDevice(), bd->ShaderModuleFrag, nullptr); bd->ShaderModuleFrag = VK_NULL_HANDLE; } - if (bd->FontSampler) { vkDestroySampler(g_vulkan_context->GetDevice(), bd->FontSampler, nullptr); bd->FontSampler = VK_NULL_HANDLE; } - if (bd->DescriptorSetLayout) { vkDestroyDescriptorSetLayout(g_vulkan_context->GetDevice(), bd->DescriptorSetLayout, nullptr); bd->DescriptorSetLayout = VK_NULL_HANDLE; } - if (bd->PipelineLayout) { vkDestroyPipelineLayout(g_vulkan_context->GetDevice(), bd->PipelineLayout, nullptr); bd->PipelineLayout = VK_NULL_HANDLE; } - if (bd->Pipeline) { vkDestroyPipeline(g_vulkan_context->GetDevice(), bd->Pipeline, nullptr); bd->Pipeline = VK_NULL_HANDLE; } -} - -bool ImGui_ImplVulkan_Init(VkRenderPass render_pass) -{ - ImGuiIO& io = ImGui::GetIO(); - IM_ASSERT(io.BackendRendererUserData == NULL && "Already initialized a renderer backend!"); - - // Setup backend capabilities flags - ImGui_ImplVulkan_Data* bd = IM_NEW(ImGui_ImplVulkan_Data)(); - io.BackendRendererUserData = (void*)bd; - io.BackendRendererName = "imgui_impl_vulkan"; - io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. - - IM_ASSERT(render_pass != VK_NULL_HANDLE); - - bd->RenderPass = render_pass; - - return ImGui_ImplVulkan_CreateDeviceObjects(); -} - -void ImGui_ImplVulkan_Shutdown() -{ - ImGui_ImplVulkan_Data* bd = ImGui_ImplVulkan_GetBackendData(); - IM_ASSERT(bd != NULL && "No renderer backend to shutdown, or already shutdown?"); - ImGuiIO& io = ImGui::GetIO(); - - ImGui_ImplVulkan_DestroyDeviceObjects(); - io.BackendRendererName = NULL; - io.BackendRendererUserData = NULL; - IM_DELETE(bd); -} diff --git a/src/util/imgui_impl_vulkan.h b/src/util/imgui_impl_vulkan.h deleted file mode 100644 index 5b4ecdb67..000000000 --- a/src/util/imgui_impl_vulkan.h +++ /dev/null @@ -1,12 +0,0 @@ -// dear imgui: Renderer Backend for Vulkan -// This needs to be used along with a Platform Backend (e.g. GLFW, SDL, Win32, custom..) - -#pragma once -#include "imgui.h" // IMGUI_IMPL_API -#include "common/vulkan/loader.h" - -// Called by user code -bool ImGui_ImplVulkan_Init(VkRenderPass render_pass); -void ImGui_ImplVulkan_Shutdown(); -void ImGui_ImplVulkan_RenderDrawData(ImDrawData* draw_data); -bool ImGui_ImplVulkan_CreateFontsTexture(); diff --git a/src/util/imgui_manager.cpp b/src/util/imgui_manager.cpp index 15cf51917..f539ec864 100644 --- a/src/util/imgui_manager.cpp +++ b/src/util/imgui_manager.cpp @@ -8,9 +8,8 @@ #include "common/log.h" #include "common/string_util.h" #include "common/timer.h" -#include "core/gpu.h" +#include "gpu_device.h" #include "core/host.h" -#include "host_display.h" #include "core/system.h" #include "fmt/format.h" #include "imgui.h" @@ -82,7 +81,7 @@ bool ImGuiManager::Initialize() } s_global_scale = - std::max(g_host_display->GetWindowScale() * static_cast(g_settings.display_osd_scale / 100.0f), 1.0f); + std::max(g_gpu_device->GetWindowScale() * static_cast(g_settings.display_osd_scale / 100.0f), 1.0f); ImGui::CreateContext(); @@ -100,24 +99,16 @@ bool ImGuiManager::Initialize() #endif io.DisplayFramebufferScale = ImVec2(1, 1); // We already scale things ourselves, this would double-apply scaling - io.DisplaySize.x = static_cast(g_host_display->GetWindowWidth()); - io.DisplaySize.y = static_cast(g_host_display->GetWindowHeight()); + io.DisplaySize.x = static_cast(g_gpu_device->GetWindowWidth()); + io.DisplaySize.y = static_cast(g_gpu_device->GetWindowHeight()); SetKeyMap(); SetStyle(); - if (!g_host_display->CreateImGuiContext()) - { - Panic("Failed to create ImGui device context"); - g_host_display->DestroyImGuiContext(); - ImGui::DestroyContext(); - return false; - } - if (!AddImGuiFonts(false) || !g_host_display->UpdateImGuiFontTexture()) + if (!AddImGuiFonts(false) || !g_gpu_device->UpdateImGuiFontTexture()) { Panic("Failed to create ImGui font text"); - g_host_display->DestroyImGuiContext(); ImGui::DestroyContext(); return false; } @@ -131,8 +122,6 @@ bool ImGuiManager::Initialize() void ImGuiManager::Shutdown() { - if (g_host_display) - g_host_display->DestroyImGuiContext(); if (ImGui::GetCurrentContext()) ImGui::DestroyContext(); @@ -145,8 +134,8 @@ void ImGuiManager::Shutdown() void ImGuiManager::WindowResized() { - const u32 new_width = g_host_display ? g_host_display->GetWindowWidth() : 0; - const u32 new_height = g_host_display ? g_host_display->GetWindowHeight() : 0; + const u32 new_width = g_gpu_device ? g_gpu_device->GetWindowWidth() : 0; + const u32 new_height = g_gpu_device ? g_gpu_device->GetWindowHeight() : 0; ImGui::GetIO().DisplaySize = ImVec2(static_cast(new_width), static_cast(new_height)); @@ -159,7 +148,7 @@ void ImGuiManager::WindowResized() void ImGuiManager::UpdateScale() { - const float window_scale = g_host_display ? g_host_display->GetWindowScale() : 1.0f; + const float window_scale = g_gpu_device ? g_gpu_device->GetWindowScale() : 1.0f; const float scale = std::max(window_scale * static_cast(g_settings.display_osd_scale / 100.0f), 1.0f); if (scale == s_global_scale && (!HasFullscreenFonts() || !ImGuiFullscreen::UpdateLayoutScale())) @@ -178,7 +167,7 @@ void ImGuiManager::UpdateScale() if (!AddImGuiFonts(HasFullscreenFonts())) Panic("Failed to create ImGui font text"); - if (!g_host_display->UpdateImGuiFontTexture()) + if (!g_gpu_device->UpdateImGuiFontTexture()) Panic("Failed to recreate font texture after scale+resize"); NewFrame(); @@ -533,7 +522,7 @@ bool ImGuiManager::AddFullscreenFontsIfMissing() AddImGuiFonts(false); } - g_host_display->UpdateImGuiFontTexture(); + g_gpu_device->UpdateImGuiFontTexture(); NewFrame(); return HasFullscreenFonts(); @@ -563,10 +552,10 @@ void Host::AddOSDMessage(std::string message, float duration /*= 2.0f*/) void Host::AddKeyedOSDMessage(std::string key, std::string message, float duration /* = 2.0f */) { - if (!key.empty()) - Log_InfoPrintf("OSD [%s]: %s", key.c_str(), message.c_str()); - else - Log_InfoPrintf("OSD: %s", message.c_str()); + if (!key.empty()) + Log_InfoPrintf("OSD [%s]: %s", key.c_str(), message.c_str()); + else + Log_InfoPrintf("OSD: %s", message.c_str()); OSDMessage msg; msg.key = std::move(key); diff --git a/src/util/input_manager.h b/src/util/input_manager.h index 0f97418bd..642bf4dbe 100644 --- a/src/util/input_manager.h +++ b/src/util/input_manager.h @@ -12,9 +12,9 @@ #include "common/settings_interface.h" #include "common/types.h" -#include "common/window_info.h" #include "core/input_types.h" +#include "window_info.h" /// Class, or source of an input event. enum class InputSourceType : u32 diff --git a/src/util/input_source.h b/src/util/input_source.h index 0a5784924..e34533b89 100644 --- a/src/util/input_source.h +++ b/src/util/input_source.h @@ -77,9 +77,6 @@ public: #ifdef WITH_SDL2 static std::unique_ptr CreateSDLSource(); #endif -#ifdef WITH_EVDEV - static std::unique_ptr CreateEvdevSource(); -#endif #ifdef __ANDROID__ static std::unique_ptr CreateAndroidSource(); #endif diff --git a/src/util/metal_device.h b/src/util/metal_device.h new file mode 100644 index 000000000..fd3daff96 --- /dev/null +++ b/src/util/metal_device.h @@ -0,0 +1,385 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +// Macro hell. These have to come first. +#include +#include +#include + +#ifndef __OBJC__ +#error This file needs to be compiled with Objective C++. +#endif + +#if __has_feature(objc_arc) +#error ARC should not be enabled. +#endif + +#include "gpu_device.h" +#include "metal_stream_buffer.h" +#include "postprocessing_chain.h" +#include "window_info.h" + +#include "common/rectangle.h" +#include "common/timer.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +class MetalDevice; +class MetalFramebuffer; +class MetalPipeline; +class MetalTexture; + +class MetalSampler final : public GPUSampler +{ + friend MetalDevice; + +public: + ~MetalSampler() override; + + ALWAYS_INLINE id GetSamplerState() const { return m_ss; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalSampler(id ss); + + id m_ss; +}; + +class MetalShader final : public GPUShader +{ + friend MetalDevice; + +public: + ~MetalShader() override; + + ALWAYS_INLINE id GetLibrary() const { return m_library; } + ALWAYS_INLINE id GetFunction() const { return m_function; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalShader(GPUShaderStage stage, id library, id function); + + id m_library; + id m_function; +}; + +class MetalPipeline final : public GPUPipeline +{ + friend MetalDevice; + +public: + ~MetalPipeline() override; + + ALWAYS_INLINE id GetPipelineState() const { return m_pipeline; } + ALWAYS_INLINE id GetDepthState() const { return m_depth; } + ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } + ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, + MTLPrimitiveType primitive); + + id m_pipeline; + id m_depth; + MTLCullMode m_cull_mode; + MTLPrimitiveType m_primitive; +}; + +class MetalTexture final : public GPUTexture +{ + friend MetalDevice; + +public: + ~MetalTexture(); + + ALWAYS_INLINE id GetMTLTexture() const { return m_texture; } + + bool Create(id device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0); + void Destroy(); + + bool IsValid() const override; + + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + + void MakeReadyForSampling() override; + + void SetDebugName(const std::string_view& name) override; + +private: + MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, + Format format); + + id m_texture; + + u16 m_map_x = 0; + u16 m_map_y = 0; + u16 m_map_width = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; +}; + +class MetalTextureBuffer final : public GPUTextureBuffer +{ +public: + MetalTextureBuffer(Format format, u32 size_in_elements); + ~MetalTextureBuffer() override; + + ALWAYS_INLINE id GetMTLBuffer() const { return m_buffer.GetBuffer(); } + + bool CreateBuffer(id device); + + // Inherited via GPUTextureBuffer + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; + + void SetDebugName(const std::string_view& name) override; + +private: + MetalStreamBuffer m_buffer; +}; + +class MetalFramebuffer final : public GPUFramebuffer +{ + friend MetalDevice; + +public: + ~MetalFramebuffer() override; + + MTLRenderPassDescriptor* GetDescriptor() const; + + void SetDebugName(const std::string_view& name) override; + +private: + MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, id ds_tex, + MTLRenderPassDescriptor* descriptor); + + id m_rt_tex; + id m_ds_tex; + MTLRenderPassDescriptor* m_descriptor; +}; + +class MetalDevice final : public GPUDevice +{ +public: + ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } + ALWAYS_INLINE static id GetMTLDevice() { return GetInstance().m_device; } + ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } + ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } + + MetalDevice(); + ~MetalDevice(); + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + + AdapterAndModeList GetAdapterAndModeList() override; + void DestroySurface() override; + + std::string GetDriverInfo() const override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 width, u32 height) override; + void ClearRenderTarget(GPUTexture* t, u32 c) override; + void ClearDepth(GPUTexture* t, float d) override; + void InvalidateRenderTarget(GPUTexture* t) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary = nullptr) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + bool GetHostRefreshRate(float* refresh_rate) override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + void WaitForFenceCounter(u64 counter); + + ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } + id GetBlitEncoder(bool is_inline); + + void SubmitCommandBuffer(bool wait_for_completion = false); + void SubmitCommandBufferAndRestartRenderPass(const char* reason); + + void CommitClear(MetalTexture* tex); + + void UnbindFramebuffer(MetalFramebuffer* fb); + void UnbindFramebuffer(MetalTexture* tex); + void UnbindPipeline(MetalPipeline* pl); + void UnbindTexture(MetalTexture* tex); + void UnbindTextureBuffer(MetalTextureBuffer* buf); + + static void DeferRelease(id obj); + static void DeferRelease(u64 fence_counter, id obj); + + static AdapterAndModeList StaticGetAdapterAndModeList(); + +protected: + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) override; + void DestroyDevice() override; + +private: + static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; + static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; + + using DepthStateMap = std::unordered_map>; + + ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } + + void SetFeatures(); + + std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, + const std::string_view& entry_point); + + id GetDepthState(const GPUPipeline::DepthState& ds); + + void CreateCommandBuffer(); + void CommandBufferCompletedOffThread(u64 fence_counter); + void WaitForPreviousCommandBuffers(); + void CleanupObjects(); + + ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } + ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } + void BeginRenderPass(); + void EndRenderPass(); + void EndInlineUploading(); + void EndAnyEncoding(); + + Common::Rectangle ClampToFramebufferSize(const Common::Rectangle& rc) const; + void PreDrawCheck(); + void SetInitialEncoderState(); + void SetUniformBufferInRenderEncoder(); + void SetViewportInRenderEncoder(); + void SetScissorInRenderEncoder(); + + bool CheckDownloadBufferSize(u32 required_size); + + bool CreateLayer(); + void DestroyLayer(); + void RenderBlankFrame(); + + bool CreateBuffers(); + void DestroyBuffers(); + + bool CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + + id m_device; + id m_queue; + + CAMetalLayer* m_layer = nil; + id m_layer_drawable = nil; + MTLRenderPassDescriptor* m_layer_pass_desc = nil; + + std::mutex m_fence_mutex; + u64 m_current_fence_counter = 0; + std::atomic m_completed_fence_counter{0}; + std::deque> m_cleanup_objects; // [fence_counter, object] + + DepthStateMap m_depth_states; + + id m_download_buffer = nil; + u32 m_download_buffer_size = 0; + + MetalStreamBuffer m_vertex_buffer; + MetalStreamBuffer m_index_buffer; + MetalStreamBuffer m_uniform_buffer; + MetalStreamBuffer m_texture_upload_buffer; + + id m_upload_cmdbuf = nil; + id m_upload_encoder = nil; + id m_inline_upload_encoder = nil; + + id m_render_cmdbuf = nil; + id m_render_encoder = nil; + + MetalFramebuffer* m_current_framebuffer = nullptr; + + MetalPipeline* m_current_pipeline = nullptr; + id m_current_depth_state = nil; + MTLCullMode m_current_cull_mode = MTLCullModeNone; + u32 m_current_uniform_buffer_position = 0; + + std::array, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; + std::array, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; + id m_current_ssbo = nil; + Common::Rectangle m_current_viewport = {}; + Common::Rectangle m_current_scissor = {}; + + bool m_vsync_enabled = false; + + // std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; + // u8 m_read_timestamp_query = 0; + // u8 m_write_timestamp_query = 0; + // u8 m_waiting_timestamp_queries = 0; + // bool m_timestamp_query_started = false; + // float m_accumulated_gpu_time = 0.0f; +}; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm new file mode 100644 index 000000000..f6874299c --- /dev/null +++ b/src/util/metal_device.mm @@ -0,0 +1,2134 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "metal_device.h" +#include "core/host_settings.h" +#include "spirv_compiler.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/path.h" +#include "common/string_util.h" + +// TODO FIXME... +#define FMT_EXCEPTIONS 0 +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(MetalDevice); + +// TODO: Disable hazard tracking and issue barriers explicitly. + +static constexpr MTLPixelFormat LAYER_MTL_PIXEL_FORMAT = MTLPixelFormatRGBA8Unorm; +static constexpr GPUTexture::Format LAYER_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; + +// Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems +// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. +static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; + +// The pitch alignment must be less or equal to the upload alignment. +// We need 32 here for AVX2, so 64 is also fine. +static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; + +static constexpr std::array(GPUTexture::Format::MaxCount)> s_pixel_format_mapping = { + MTLPixelFormatInvalid, // Unknown + MTLPixelFormatRGBA8Unorm, // RGBA8 + MTLPixelFormatBGRA8Unorm, // BGRA8 + MTLPixelFormatB5G6R5Unorm, // RGB565 + MTLPixelFormatA1BGR5Unorm, // RGBA5551 + MTLPixelFormatR8Unorm, // R8 + MTLPixelFormatDepth16Unorm, // D16 + MTLPixelFormatR16Unorm, // R16 + MTLPixelFormatR16Float, // R16F + MTLPixelFormatR32Sint, // R32I + MTLPixelFormatR32Uint, // R32U + MTLPixelFormatR32Float, // R32F + MTLPixelFormatRG8Unorm, // RG8 + MTLPixelFormatRG16Unorm, // RG16 + MTLPixelFormatRG16Float, // RG16F + MTLPixelFormatRG32Float, // RG32F + MTLPixelFormatRGBA16Unorm, // RGBA16 + MTLPixelFormatRGBA16Float, // RGBA16F + MTLPixelFormatRGBA32Float, // RGBA32F + MTLPixelFormatBGR10A2Unorm, // RGB10A2 +}; + +static constexpr std::array s_clear_color = {}; + +static unsigned s_next_bad_shader_id = 1; + +static NSString* StringViewToNSString(const std::string_view& str) +{ + if (str.empty()) + return nil; + + return [[[NSString alloc] autorelease] initWithBytes:str.data() + length:static_cast(str.length()) + encoding:NSUTF8StringEncoding]; +} + +static void LogNSError(NSError* error, const char* desc, ...) +{ + std::va_list ap; + va_start(ap, desc); + Log::Writev("MetalDevice", "", LOGLEVEL_ERROR, desc, ap); + va_end(ap); + + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Code: %u", static_cast(error.code)); + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Description: %s", [error.description UTF8String]); +} + +template +static void RunOnMainThread(F&& f) +{ + if ([NSThread isMainThread]) + f(); + else + dispatch_sync(dispatch_get_main_queue(), f); +} + +MetalDevice::MetalDevice() : m_current_viewport(0, 0, 1, 1), m_current_scissor(0, 0, 1, 1) +{ +} + +MetalDevice::~MetalDevice() +{ + Assert(m_layer == nil); + Assert(m_device == nil); +} + +RenderAPI MetalDevice::GetRenderAPI() const +{ + return RenderAPI::Metal; +} + +bool MetalDevice::HasSurface() const +{ + return (m_layer != nil); +} + +bool MetalDevice::GetHostRefreshRate(float* refresh_rate) +{ + return GPUDevice::GetHostRefreshRate(refresh_rate); +} + +void MetalDevice::SetVSync(bool enabled) +{ + m_vsync_enabled = enabled; + + if (m_layer != nil) + [m_layer setDisplaySyncEnabled:enabled]; +} + +bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation) +{ + @autoreleasepool + { + id device = nil; + if (!adapter.empty()) + { + NSArray> *devices = [MTLCopyAllDevices() autorelease]; + const u32 count = static_cast([devices count]); + for (u32 i = 0; i < count; i++) + { + if (adapter == [[devices[i] name] UTF8String]) + { + device = devices[i]; + break; + } + } + + if (device == nil) + Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str()); + } + + if (device == nil) + { + device = [MTLCreateSystemDefaultDevice() autorelease]; + if (device == nil) + { + Log_ErrorPrint("Failed to create default Metal device."); + return false; + } + } + + id queue = [[device newCommandQueue] autorelease]; + if (queue == nil) + { + Log_ErrorPrint("Failed to create command queue."); + return false; + } + + m_device = [device retain]; + m_queue = [queue retain]; + Log_InfoPrintf("Metal Device: %s", [[m_device name] UTF8String]); + + SetFeatures(); + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) + return false; + + CreateCommandBuffer(); + RenderBlankFrame(); + + if (!CreateBuffers()) + { + Log_ErrorPrintf("Failed to create buffers."); + return false; + } + + return true; + } +} + +void MetalDevice::SetFeatures() +{ + // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d + if ([m_device supportsFamily:MTLGPUFamilyMacCatalyst1] || [m_device supportsFamily:MTLGPUFamilyMac1] || + [m_device supportsFamily:MTLGPUFamilyApple3]) + { + m_max_texture_size = 16384; + } + else + { + m_max_texture_size = 8192; + } + + m_max_multisamples = 0; + for (u32 multisamples = 1; multisamples < 16; multisamples++) + { + if (![m_device supportsTextureSampleCount:multisamples]) + break; + m_max_multisamples = multisamples; + } + + m_features.dual_source_blend = true; + m_features.per_sample_shading = true; + m_features.noperspective_interpolation = true; + m_features.supports_texture_buffers = true; + m_features.texture_buffers_emulated_with_ssbo = true; + m_features.partial_msaa_resolve = true; + m_features.shader_cache = true; + m_features.pipeline_cache = false; +} + +void MetalDevice::DestroyDevice() +{ + WaitForPreviousCommandBuffers(); + + if (InRenderPass()) + EndRenderPass(); + + if (m_upload_cmdbuf != nil) + { + [m_upload_encoder endEncoding]; + [m_upload_encoder release]; + m_upload_encoder = nil; + [m_upload_cmdbuf release]; + m_upload_cmdbuf = nil; + } + if (m_render_cmdbuf != nil) + { + [m_render_cmdbuf release]; + m_render_cmdbuf = nil; + } + + DestroyBuffers(); + + for (auto& it : m_cleanup_objects) + [it.second release]; + m_cleanup_objects.clear(); + + if (m_queue != nil) + { + [m_queue release]; + m_queue = nil; + } + if (m_device != nil) + { + [m_device release]; + m_device = nil; + } +} + +bool MetalDevice::CreateLayer() +{ + @autoreleasepool + { + RunOnMainThread([this]() { + @autoreleasepool + { + Log_InfoPrintf("Creating a %ux%u Metal layer.", m_window_info.surface_width, m_window_info.surface_height); + const auto size = + CGSizeMake(static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)); + m_layer = [CAMetalLayer layer]; + [m_layer setDevice:m_device]; + [m_layer setDrawableSize:size]; + [m_layer setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + NSView* view = GetWindowView(); + [view setWantsLayer:TRUE]; + [view setLayer:m_layer]; + } + }); + + [m_layer setDisplaySyncEnabled:m_vsync_enabled]; + m_window_info.surface_format = GPUTexture::Format::RGBA8; + + DebugAssert(m_layer_pass_desc == nil); + m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; + m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore; + m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0); + return true; + } +} + +void MetalDevice::DestroyLayer() +{ + if (m_layer == nil) + return; + + // Should wait for previous command buffers to finish, which might be rendering to drawables. + WaitForPreviousCommandBuffers(); + + [m_layer_pass_desc release]; + m_layer_pass_desc = nil; + m_window_info.surface_format = GPUTexture::Format::Unknown; + + RunOnMainThread([this]() { + NSView* view = GetWindowView(); + [view setLayer:nil]; + [view setWantsLayer:FALSE]; + [m_layer release]; + m_layer = nullptr; + }); +} + +void MetalDevice::RenderBlankFrame() +{ + DebugAssert(!InRenderPass()); + if (m_layer == nil) + return; + + @autoreleasepool + { + id drawable = [m_layer nextDrawable]; + m_layer_pass_desc.colorAttachments[0].texture = [drawable texture]; + id encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc]; + [encoder endEncoding]; + [m_render_cmdbuf presentDrawable:drawable]; + SubmitCommandBuffer(); + } +} + +bool MetalDevice::UpdateWindow() +{ + if (InRenderPass()) + EndRenderPass(); + DestroyLayer(); + + if (!AcquireWindow(false)) + return false; + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) + { + Log_ErrorPrintf("Failed to create layer on updated window"); + return false; + } + + return true; +} + +void MetalDevice::DestroySurface() +{ + DestroyLayer(); +} + +void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + @autoreleasepool + { + m_window_info.surface_scale = new_window_scale; + if (static_cast(new_window_width) == m_window_info.surface_width && + static_cast(new_window_height) == m_window_info.surface_height) + { + return; + } + + m_window_info.surface_width = new_window_width; + m_window_info.surface_height = new_window_height; + + [m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)]; + m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; + m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; + } +} + +std::string MetalDevice::GetDriverInfo() const +{ + @autoreleasepool + { + return ([[m_device description] UTF8String]); + } +} + +bool MetalDevice::CreateBuffers() +{ + if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || + !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) || + !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) + { + Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); + return false; + } + + return true; +} + +void MetalDevice::DestroyBuffers() +{ + if (m_download_buffer != nil) + { + [m_download_buffer release]; + m_download_buffer = nil; + m_download_buffer_size = 0; + } + + m_texture_upload_buffer.Destroy(); + m_uniform_buffer.Destroy(); + m_vertex_buffer.Destroy(); + m_index_buffer.Destroy(); + + for (auto& it : m_depth_states) + { + if (it.second != nil) + [it.second release]; + } + m_depth_states.clear(); +} + +GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() +{ + AdapterAndModeList ret; + @autoreleasepool { + NSArray> *devices = [MTLCopyAllDevices() autorelease]; + const u32 count = static_cast([devices count]); + ret.adapter_names.reserve(count); + for (u32 i = 0; i < count; i++) + ret.adapter_names.emplace_back([devices[i].name UTF8String]); + } + + return ret; +} + +GPUDevice::AdapterAndModeList MetalDevice::GetAdapterAndModeList() +{ + return StaticGetAdapterAndModeList(); +} + +#if 0 +bool MetalDevice::CreateTimestampQueries() +{ + for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) + { + for (u32 j = 0; j < 3; j++) + { + const CMetal_QUERY_DESC qdesc((j == 0) ? Metal_QUERY_TIMESTAMP_DISJOINT : Metal_QUERY_TIMESTAMP); + const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + m_timestamp_queries = {}; + return false; + } + } + } + + KickTimestampQuery(); + return true; +} + +void MetalDevice::DestroyTimestampQueries() +{ + if (!m_timestamp_queries[0][0]) + return; + + if (m_timestamp_query_started) + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + + m_timestamp_queries = {}; + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; +} + +void MetalDevice::PopTimestampQuery() +{ + while (m_waiting_timestamp_queries > 0) + { + Metal_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint, + sizeof(disjoint), Metal_ASYNC_GETDATA_DONOTFLUSH); + if (disjoint_hr != S_OK) + break; + + if (disjoint.Disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; + } + else + { + u64 start = 0, end = 0; + const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start, + sizeof(start), Metal_ASYNC_GETDATA_DONOTFLUSH); + const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end), + Metal_ASYNC_GETDATA_DONOTFLUSH); + if (start_hr == S_OK && end_hr == S_OK) + { + const float delta = + static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); + m_accumulated_gpu_time += delta; + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + } + } + + if (m_timestamp_query_started) + { + m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void MetalDevice::KickTimestampQuery() +{ + if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + m_timestamp_query_started = true; +} +#endif + +bool MetalDevice::SetGPUTimingEnabled(bool enabled) +{ +#if 0 + if (m_gpu_timing_enabled == enabled) + return true; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + { + if (!CreateTimestampQueries()) + return false; + + KickTimestampQuery(); + return true; + } + else + { + DestroyTimestampQueries(); + return true; + } +#else + return false; +#endif +} + +float MetalDevice::GetAndResetAccumulatedGPUTime() +{ +#if 0 + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +#else + return 0.0f; +#endif +} + +MetalShader::MetalShader(GPUShaderStage stage, id library, id function) + : GPUShader(stage), m_library(library), m_function(function) +{ +} + +MetalShader::~MetalShader() +{ + MetalDevice::DeferRelease(m_function); + MetalDevice::DeferRelease(m_library); +} + +void MetalShader::SetDebugName(const std::string_view& name) +{ + @autoreleasepool + { + [m_function setLabel:StringViewToNSString(name)]; + } +} + +// TODO: Clean this up, somehow.. +namespace EmuFolders { +extern std::string DataRoot; +} +static void DumpShader(u32 n, const std::string_view& suffix, const std::string_view& data) +{ + if (data.empty()) + return; + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("shader{}_{}.txt", suffix, n)).c_str(), "wb"); + if (!fp) + return; + + std::fwrite(data.data(), data.length(), 1, fp.get()); +} + +std::unique_ptr MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, + const std::string_view& entry_point) +{ + @autoreleasepool + { + NSString* const ns_source = StringViewToNSString(source); + NSError* error = nullptr; + id library = [m_device newLibraryWithSource:ns_source options:nil error:&error]; + if (!library) + { + LogNSError(error, "Failed to compile %s shader", GPUShader::GetStageName(stage)); + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile %s failed: %u\n", GPUShader::GetStageName(stage), + static_cast(error.code)); + + const char* utf_error = [error.description UTF8String]; + std::fwrite(utf_error, std::strlen(utf_error), 1, fp.get()); + } + + return {}; + } + + id function = [library newFunctionWithName:StringViewToNSString(entry_point)]; + if (!function) + { + Log_ErrorPrintf("Failed to get main function in compiled library"); + return {}; + } + + return std::unique_ptr(new MetalShader(stage, [library retain], [function retain])); + } +} + +std::unique_ptr MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + const std::string_view str_data(reinterpret_cast(data.data()), data.size()); + return CreateShaderFromMSL(stage, str_data, "main0"); +} + +std::unique_ptr MetalDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary /* = nullptr */) +{ + const u32 options = (m_debug_device ? SPIRVCompiler::DebugInfo : 0) | SPIRVCompiler::VulkanRules; + static constexpr bool dump_shaders = false; + + if (std::strcmp(entry_point, "main") != 0) + { + Log_ErrorPrintf("Entry point must be 'main', but got '%s' instead.", entry_point); + return {}; + } + + std::optional spirv = SPIRVCompiler::CompileShader(stage, source, options); + if (!spirv.has_value()) + { + Log_ErrorPrintf("Failed to compile shader to SPIR-V."); + return {}; + } + + std::optional msl = SPIRVCompiler::CompileSPIRVToMSL(spirv.value()); + if (!msl.has_value()) + { + Log_ErrorPrintf("Failed to compile SPIR-V to MSL."); + return {}; + } + if constexpr (dump_shaders) + { + DumpShader(s_next_bad_shader_id, "_input", source); + DumpShader(s_next_bad_shader_id, "_msl", msl.value()); + s_next_bad_shader_id++; + } + + if (out_binary) + { + out_binary->resize(msl->size()); + std::memcpy(out_binary->data(), msl->data(), msl->size()); + } + + return CreateShaderFromMSL(stage, msl.value(), "main0"); +} + +MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, + MTLPrimitiveType primitive) + : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive) +{ +} + +MetalPipeline::~MetalPipeline() +{ + MetalDevice::DeferRelease(m_pipeline); +} + +void MetalPipeline::SetDebugName(const std::string_view& name) +{ + // readonly property :/ +} + +id MetalDevice::GetDepthState(const GPUPipeline::DepthState& ds) +{ + const auto it = m_depth_states.find(ds.key); + if (it != m_depth_states.end()) + return it->second; + + @autoreleasepool + { + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = { + { + MTLCompareFunctionNever, // Never + MTLCompareFunctionAlways, // Always + MTLCompareFunctionLess, // Less + MTLCompareFunctionLessEqual, // LessEqual + MTLCompareFunctionGreater, // Greater + MTLCompareFunctionGreaterEqual, // GreaterEqual + MTLCompareFunctionEqual, // Equal + }}; + + MTLDepthStencilDescriptor* desc = [[[MTLDepthStencilDescriptor alloc] init] autorelease]; + desc.depthCompareFunction = func_mapping[static_cast(ds.depth_test.GetValue())]; + desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE; + + id state = [m_device newDepthStencilStateWithDescriptor:desc]; + m_depth_states.emplace(ds.key, state); + if (state == nil) + Log_ErrorPrintf("Failed to create depth-stencil state."); + + return state; + } +} + +std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ + @autoreleasepool + { + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> + primitive_classes = {{ + MTLPrimitiveTopologyClassPoint, // Points + MTLPrimitiveTopologyClassLine, // Lines + MTLPrimitiveTopologyClassTriangle, // Triangles + MTLPrimitiveTopologyClassTriangle, // TriangleStrips + }}; + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ + MTLPrimitiveTypePoint, // Points + MTLPrimitiveTypeLine, // Lines + MTLPrimitiveTypeTriangle, // Triangles + MTLPrimitiveTypeTriangleStrip, // TriangleStrips + }}; + + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const MTLVertexFormat + format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float + {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 + {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 + {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, + MTLVertexFormatUChar4Normalized}, // UNorm8 + {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 + {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 + {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, + MTLVertexFormatUShort4Normalized}, // UNorm16 + {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 + {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 + }; + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + MTLCullModeNone, // None + MTLCullModeFront, // Front + MTLCullModeBack, // Back + }}; + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + MTLBlendFactorZero, // Zero + MTLBlendFactorOne, // One + MTLBlendFactorSourceColor, // SrcColor + MTLBlendFactorOneMinusSourceColor, // InvSrcColor + MTLBlendFactorDestinationColor, // DstColor + MTLBlendFactorOneMinusDestinationColor, // InvDstColor + MTLBlendFactorSourceAlpha, // SrcAlpha + MTLBlendFactorOneMinusSourceAlpha, // InvSrcAlpha + MTLBlendFactorSource1Alpha, // SrcAlpha1 + MTLBlendFactorOneMinusSource1Alpha, // InvSrcAlpha1 + MTLBlendFactorDestinationAlpha, // DstAlpha + MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha + MTLBlendFactorBlendColor, // ConstantAlpha + MTLBlendFactorOneMinusBlendColor, // InvConstantAlpha + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + MTLBlendOperationAdd, // Add + MTLBlendOperationSubtract, // Subtract + MTLBlendOperationReverseSubtract, // ReverseSubtract + MTLBlendOperationMin, // Min + MTLBlendOperationMax, // Max + }}; + + MTLRenderPipelineDescriptor* desc = [[[MTLRenderPipelineDescriptor alloc] init] autorelease]; + desc.vertexFunction = static_cast(config.vertex_shader)->GetFunction(); + desc.fragmentFunction = static_cast(config.fragment_shader)->GetFunction(); + + desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_format)]; + desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast(config.depth_format)]; + + // Input assembly. + MTLVertexDescriptor* vdesc = nil; + if (!config.input_layout.vertex_attributes.empty()) + { + vdesc = [MTLVertexDescriptor vertexDescriptor]; + for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; + DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); + + MTLVertexAttributeDescriptor* vd = vdesc.attributes[i]; + vd.format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; + vd.offset = static_cast(va.offset.GetValue()); + vd.bufferIndex = 1; + } + + vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex; + vdesc.layouts[1].stepRate = 1; + vdesc.layouts[1].stride = config.input_layout.vertex_stride; + + desc.vertexDescriptor = vdesc; + } + + // Rasterization state. + const MTLCullMode cull_mode = cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())]; + desc.rasterizationEnabled = TRUE; + desc.inputPrimitiveTopology = primitive_classes[static_cast(config.primitive)]; + + // Depth state + id depth = GetDepthState(config.depth); + if (depth == nil) + return {}; + + // Blending state + MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0]; + ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | + (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) | + (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) | + (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone); + + // General + const MTLPrimitiveType primitive = primitives[static_cast(config.primitive)]; + desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1; + + // Metal-specific stuff + desc.vertexBuffers[1].mutability = MTLMutabilityImmutable; + desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable; + + ca.blendingEnabled = config.blend.enable; + if (config.blend.enable) + { + ca.sourceRGBBlendFactor = blend_mapping[static_cast(config.blend.src_blend.GetValue())]; + ca.destinationRGBBlendFactor = blend_mapping[static_cast(config.blend.dst_blend.GetValue())]; + ca.rgbBlendOperation = op_mapping[static_cast(config.blend.blend_op.GetValue())]; + ca.sourceAlphaBlendFactor = blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())]; + ca.destinationAlphaBlendFactor = blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())]; + ca.alphaBlendOperation = op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())]; + } + + NSError* error = nullptr; + id pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error]; + if (pipeline == nil) + { + LogNSError(error, "Failed to create render pipeline state"); + return {}; + } + + return std::unique_ptr(new MetalPipeline(pipeline, depth, cull_mode, primitive)); + } +} + +MetalTexture::MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, + Format format) + : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) +{ +} + +MetalTexture::~MetalTexture() +{ + MetalDevice::GetInstance().UnbindTexture(this); + Destroy(); +} + +bool MetalTexture::IsValid() const +{ + return (m_texture != nil); +} + +bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + id actual_buffer; + u32 actual_offset; + u32 actual_pitch; + if (req_size >= (sb.GetCurrentSize() / 2u)) + { + const u32 upload_size = height * pitch; + const MTLResourceOptions options = MTLResourceStorageModeShared; + actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options]; + actual_offset = 0; + actual_pitch = pitch; + if (actual_buffer == nil) + { + Panic("Failed to allocate temporary buffer."); + return false; + } + + dev.DeferRelease(actual_buffer); + } + else + { + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + dev.SubmitCommandBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + Panic("Failed to reserve texture upload space."); + return false; + } + } + + actual_offset = sb.GetCurrentOffset(); + StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); + sb.CommitMemory(req_size); + actual_buffer = sb.GetBuffer(); + actual_pitch = aligned_pitch; + } + + if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) + dev.CommitClear(this); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetBlitEncoder(is_inline); + [encoder copyFromBuffer:actual_buffer + sourceOffset:actual_offset + sourceBytesPerRow:actual_pitch + sourceBytesPerImage:0 + sourceSize:MTLSizeMake(width, height, 1) + toTexture:m_texture + destinationSlice:layer + destinationLevel:level + destinationOrigin:MTLOriginMake(x, y, 0)]; + m_state = GPUTexture::State::Dirty; + return true; +} + +bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) + return false; + + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) + dev.CommitClear(this); + + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + dev.SubmitCommandBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + Panic("Failed to allocate space in texture upload buffer"); + return false; + } + } + + *map = sb.GetCurrentHostPointer(); + *map_stride = aligned_pitch; + m_map_x = x; + m_map_y = y; + m_map_width = width; + m_map_height = height; + m_map_layer = layer; + m_map_level = level; + m_state = GPUTexture::State::Dirty; + return true; +} + +void MetalTexture::Unmap() +{ + const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = m_map_height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + const u32 offset = sb.GetCurrentOffset(); + sb.CommitMemory(req_size); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetBlitEncoder(is_inline); + [encoder copyFromBuffer:sb.GetBuffer() + sourceOffset:offset + sourceBytesPerRow:aligned_pitch + sourceBytesPerImage:0 + sourceSize:MTLSizeMake(m_map_width, m_map_height, 1) + toTexture:m_texture + destinationSlice:m_map_layer + destinationLevel:m_map_level + destinationOrigin:MTLOriginMake(m_map_x, m_map_y, 0)]; + + m_map_x = 0; + m_map_y = 0; + m_map_width = 0; + m_map_height = 0; + m_map_layer = 0; + m_map_level = 0; +} + +void MetalTexture::MakeReadyForSampling() +{ + MetalDevice::GetInstance().UnbindFramebuffer(this); +} + +void MetalTexture::SetDebugName(const std::string_view& name) +{ + @autoreleasepool + { + [m_texture setLabel:StringViewToNSString(name)]; + } +} + +void MetalTexture::Destroy() +{ + if (m_texture != nil) + { + MetalDevice::DeferRelease(m_texture); + m_texture = nil; + } + ClearBaseProperties(); +} + +std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data, u32 data_stride, bool dynamic /* = false */) +{ + if (!GPUTexture::ValidateConfig(width, height, layers, layers, samples, type, format)) + return {}; + + const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast(format)]; + if (pixel_format == MTLPixelFormatInvalid) + return {}; + + @autoreleasepool + { + MTLTextureDescriptor* desc = [[[MTLTextureDescriptor alloc] init] autorelease]; + desc.width = width; + desc.height = height; + desc.depth = levels; + desc.pixelFormat = pixel_format; + desc.mipmapLevelCount = levels; + + switch (type) + { + case GPUTexture::Type::Texture: + desc.usage = MTLTextureUsageShaderRead; + break; + + case GPUTexture::Type::RenderTarget: + case GPUTexture::Type::DepthStencil: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; + break; + + case GPUTexture::Type::RWTexture: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; + break; + + default: + UnreachableCode(); + break; + } + + id tex = [m_device newTextureWithDescriptor:desc]; + if (tex == nil) + { + Log_ErrorPrintf("Failed to create %ux%u texture.", width, height); + return {}; + } + + // This one can *definitely* go on the upload buffer. + std::unique_ptr gtex( + new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); + if (data) + { + // TODO: handle multi-level uploads... + gtex->Update(0, 0, width, height, data, data_stride, 0, 0); + } + + return gtex; + } +} + +MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, + id ds_tex, MTLRenderPassDescriptor* descriptor) + : GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor) +{ +} + +MetalFramebuffer::~MetalFramebuffer() +{ + // TODO: safe deleting? + if (m_rt_tex != nil) + [m_rt_tex release]; + if (m_ds_tex != nil) + [m_ds_tex release]; + [m_descriptor release]; +} + +void MetalFramebuffer::SetDebugName(const std::string_view& name) +{ +} + +MTLRenderPassDescriptor* MetalFramebuffer::GetDescriptor() const +{ + if (m_rt) + { + switch (m_rt->GetState()) + { + case GPUTexture::State::Cleared: + { + const auto clear_color = m_rt->GetUNormClearColor(); + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear; + m_descriptor.colorAttachments[0].clearColor = + MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare; + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + if (m_ds) + { + switch (m_ds->GetState()) + { + case GPUTexture::State::Cleared: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionClear; + m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth(); + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare; + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + return m_descriptor; +} + +std::unique_ptr MetalDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds) +{ + DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); + MetalTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); + MetalTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); + + @autoreleasepool + { + MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + id rt_tex = RT ? [RT->GetMTLTexture() retain] : nil; + id ds_tex = DS ? [DS->GetMTLTexture() retain] : nil; + + if (RT) + { + desc.colorAttachments[0].texture = rt_tex; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + desc.colorAttachments[0].storeAction = MTLStoreActionStore; + } + + if (DS) + { + desc.depthAttachment.texture = ds_tex; + desc.depthAttachment.loadAction = MTLLoadActionLoad; + desc.depthAttachment.storeAction = MTLStoreActionStore; + } + + const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); + const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); + desc.renderTargetWidth = width; + desc.renderTargetHeight = height; + + return std::unique_ptr(new MetalFramebuffer(RT, DS, width, height, rt_tex, ds_tex, desc)); + } +} + +MetalSampler::MetalSampler(id ss) : m_ss(ss) +{ +} + +MetalSampler::~MetalSampler() = default; + +void MetalSampler::SetDebugName(const std::string_view& name) +{ + // lame.. have to put it on the descriptor :/ +} + +std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& config) +{ + @autoreleasepool + { + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + MTLSamplerAddressModeRepeat, // Repeat + MTLSamplerAddressModeClampToEdge, // ClampToEdge + MTLSamplerAddressModeClampToBorderColor, // ClampToBorder + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> min_mag_filters = + {{ + MTLSamplerMinMagFilterNearest, // Nearest + MTLSamplerMinMagFilterLinear, // Linear + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> mip_filters = {{ + MTLSamplerMipFilterNearest, // Nearest + MTLSamplerMipFilterLinear, // Linear + }}; + + struct BorderColorMapping + { + u32 color; + MTLSamplerBorderColor mtl_color; + }; + static constexpr BorderColorMapping border_color_mapping[] = { + {0x00000000u, MTLSamplerBorderColorTransparentBlack}, + {0xFF000000u, MTLSamplerBorderColorOpaqueBlack}, + {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite}, + }; + + MTLSamplerDescriptor* desc = [[[MTLSamplerDescriptor alloc] init] autorelease]; + desc.normalizedCoordinates = true; + desc.sAddressMode = ta[static_cast(config.address_u.GetValue())]; + desc.tAddressMode = ta[static_cast(config.address_v.GetValue())]; + desc.rAddressMode = ta[static_cast(config.address_w.GetValue())]; + desc.minFilter = min_mag_filters[static_cast(config.min_filter.GetValue())]; + desc.magFilter = min_mag_filters[static_cast(config.mag_filter.GetValue())]; + desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast(config.mip_filter.GetValue())] : + MTLSamplerMipFilterNotMipmapped; + desc.lodMinClamp = static_cast(config.min_lod); + desc.lodMaxClamp = static_cast(config.max_lod); + desc.maxAnisotropy = config.anisotropy; + + if (config.address_u == GPUSampler::AddressMode::ClampToBorder || + config.address_v == GPUSampler::AddressMode::ClampToBorder || + config.address_w == GPUSampler::AddressMode::ClampToBorder) + { + u32 i; + for (i = 0; i < static_cast(std::size(border_color_mapping)); i++) + { + if (border_color_mapping[i].color == config.border_color) + break; + } + if (i == std::size(border_color_mapping)) + { + Log_ErrorPrintf("Unsupported border color: %08X", config.border_color.GetValue()); + return {}; + } + + desc.borderColor = border_color_mapping[i].mtl_color; + } + + // TODO: Pool? + id ss = [m_device newSamplerStateWithDescriptor:desc]; + if (ss == nil) + { + Log_ErrorPrintf("Failed to create sampler state."); + return {}; + } + + return std::unique_ptr(new MetalSampler([ss retain])); + } +} + +bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ + constexpr u32 src_layer = 0; + constexpr u32 src_level = 0; + + const u32 copy_size = width * texture->GetPixelSize(); + const u32 pitch = Common::AlignUpPow2(copy_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 required_size = pitch * height; + if (!CheckDownloadBufferSize(required_size)) + return false; + + MetalTexture* T = static_cast(texture); + CommitClear(T); + + @autoreleasepool + { + id encoder = GetBlitEncoder(true); + + [encoder copyFromTexture:T->GetMTLTexture() + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(x, y, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:m_download_buffer + destinationOffset:0 + destinationBytesPerRow:pitch + destinationBytesPerImage:0]; + + SubmitCommandBuffer(true); + + StringUtil::StrideMemCpy(out_data, out_data_stride, [m_download_buffer contents], pitch, copy_size, height); + } + + return true; +} + +bool MetalDevice::CheckDownloadBufferSize(u32 required_size) +{ + if (m_download_buffer_size >= required_size) + return true; + + @autoreleasepool + { + // We don't need to defer releasing this one, it's not going to be used. + if (m_download_buffer != nil) + [m_download_buffer release]; + + constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceOptionCPUCacheModeDefault; + m_download_buffer = [[m_device newBufferWithLength:required_size options:options] retain]; + if (m_download_buffer == nil) + { + Log_ErrorPrintf("Failed to create %u byte download buffer", required_size); + m_download_buffer_size = 0; + return false; + } + + m_download_buffer_size = required_size; + } + + return true; +} + +bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const +{ + return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); +} + +void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipHeight(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + + MetalTexture* D = static_cast(dst); + MetalTexture* S = static_cast(src); + + if (D->IsRenderTargetOrDepthStencil()) + { + if (S->GetState() == GPUTexture::State::Cleared) + { + if (S->GetWidth() == D->GetWidth() && S->GetHeight() == D->GetHeight()) + { + // pass clear through + D->m_state = S->m_state; + D->m_clear_value = S->m_clear_value; + return; + } + } + else if (S->GetState() == GPUTexture::State::Invalidated) + { + // Contents are undefined ;) + return; + } + else if (dst_x == 0 && dst_y == 0 && width == D->GetMipWidth(dst_level) && height == D->GetMipHeight(dst_level)) + { + D->SetState(GPUTexture::State::Dirty); + } + + CommitClear(D); + } + + CommitClear(S); + + @autoreleasepool + { + id encoder = GetBlitEncoder(true); + [encoder copyFromTexture:S->GetMTLTexture() + sourceSlice:src_level + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_x, src_y, 0) + sourceSize:MTLSizeMake(width, height, 1) + toTexture:D->GetMTLTexture() + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_x, dst_y, 0)]; + } +} + +void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) +{ +#if 0 + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipHeight(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + + // DX11 can't resolve partial rects. + Assert(src_x == dst_x && src_y == dst_y); + + MetalTexture* dst11 = static_cast(dst); + MetalTexture* src11 = static_cast(src); + + src11->CommitClear(m_context.Get()); + dst11->CommitClear(m_context.Get()); + + m_context->ResolveSubresource(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()), + src11->GetD3DTexture(), MetalCalcSubresource(src_level, src_layer, src->GetLevels()), + dst11->GetDXGIFormat()); +#else + Panic("Fixme"); +#endif +} + +void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c) +{ + GPUDevice::ClearRenderTarget(t, c); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + EndRenderPass(); +} + +void MetalDevice::ClearDepth(GPUTexture* t, float d) +{ + GPUDevice::ClearDepth(t, d); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + EndRenderPass(); +} + +void MetalDevice::InvalidateRenderTarget(GPUTexture* t) +{ + GPUDevice::InvalidateRenderTarget(t); + if (InRenderPass() && m_current_framebuffer && + (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + { + EndRenderPass(); + } +} + +void MetalDevice::CommitClear(MetalTexture* tex) +{ + if (tex->GetState() == GPUTexture::State::Dirty) + return; + + DebugAssert(tex->IsRenderTargetOrDepthStencil()); + + if (tex->GetState() == GPUTexture::State::Cleared) + { + // TODO: We could combine it with the current render pass. + if (InRenderPass()) + EndRenderPass(); + + @autoreleasepool + { + // Allocating here seems a bit sad. + MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; + desc.renderTargetWidth = tex->GetWidth(); + desc.renderTargetHeight = tex->GetHeight(); + if (tex->IsRenderTarget()) + { + const auto cc = tex->GetUNormClearColor(); + desc.colorAttachments[0].texture = tex->GetMTLTexture(); + desc.colorAttachments[0].loadAction = MTLLoadActionClear; + desc.colorAttachments[0].storeAction = MTLStoreActionStore; + desc.colorAttachments[0].clearColor = MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]); + } + else + { + desc.depthAttachment.texture = tex->GetMTLTexture(); + desc.depthAttachment.loadAction = MTLLoadActionClear; + desc.depthAttachment.storeAction = MTLStoreActionStore; + desc.depthAttachment.clearDepth = tex->GetClearDepth(); + } + + id encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; + [encoder endEncoding]; + } + } +} + +MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) +{ +} + +MetalTextureBuffer::~MetalTextureBuffer() +{ + if (m_buffer.IsValid()) + MetalDevice::GetInstance().UnbindTextureBuffer(this); + m_buffer.Destroy(); +} + +bool MetalTextureBuffer::CreateBuffer(id device) +{ + return m_buffer.Create(device, GetSizeInBytes()); +} + +void* MetalTextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const u32 req_size = esize * required_elements; + if (!m_buffer.ReserveMemory(req_size, esize)) + { + MetalDevice::GetInstance().SubmitCommandBufferAndRestartRenderPass("out of space in texture buffer"); + if (!m_buffer.ReserveMemory(req_size, esize)) + Panic("Failed to allocate texture buffer space."); + } + + m_current_position = m_buffer.GetCurrentOffset() / esize; + return m_buffer.GetCurrentHostPointer(); +} + +void MetalTextureBuffer::Unmap(u32 used_elements) +{ + m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); +} + +void MetalTextureBuffer::SetDebugName(const std::string_view& name) +{ + @autoreleasepool + { + [m_buffer.GetBuffer() setLabel:StringViewToNSString(name)]; + } +} + +std::unique_ptr MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ + std::unique_ptr tb = std::make_unique(format, size_in_elements); + if (!tb->CreateBuffer(m_device)) + tb.reset(); + + return tb; +} + +void MetalDevice::PushDebugGroup(const char* fmt, ...) +{ +} + +void MetalDevice::PopDebugGroup() +{ +} + +void MetalDevice::InsertDebugMessage(const char* fmt, ...) +{ +} + +void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const u32 req_size = vertex_size * vertex_count; + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + { + SubmitCommandBufferAndRestartRenderPass("out of vertex space"); + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + Panic("Failed to allocate vertex space"); + } + + *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); + *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; + *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; +} + +void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.CommitMemory(vertex_size * vertex_count); +} + +void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const u32 req_size = sizeof(DrawIndex) * index_count; + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + { + SubmitCommandBufferAndRestartRenderPass("out of index space"); + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + Panic("Failed to allocate index space"); + } + + *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); + *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); + *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); +} + +void MetalDevice::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); +} + +void MetalDevice::PushUniformBuffer(const void* data, u32 data_size) +{ + void* map = MapUniformBuffer(data_size); + std::memcpy(map, data, data_size); + UnmapUniformBuffer(data_size); +} + +void* MetalDevice::MapUniformBuffer(u32 size) +{ + const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + { + SubmitCommandBufferAndRestartRenderPass("out of uniform space"); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + Panic("Failed to allocate uniform space."); + } + + return m_uniform_buffer.GetCurrentHostPointer(); +} + +void MetalDevice::UnmapUniformBuffer(u32 size) +{ + m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); + m_uniform_buffer.CommitMemory(size); + if (InRenderPass()) + SetUniformBufferInRenderEncoder(); +} + +void MetalDevice::SetFramebuffer(GPUFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + return; + + if (InRenderPass()) + EndRenderPass(); + + m_current_framebuffer = static_cast(fb); + + // Current pipeline might be incompatible, so unbind it. + // Otherwise it'll get bound to the new render encoder. + // TODO: we shouldn't need to do this now + m_current_pipeline = nullptr; + m_current_depth_state = nil; +} + +void MetalDevice::UnbindFramebuffer(MetalFramebuffer* fb) +{ + if (m_current_framebuffer != fb) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void MetalDevice::UnbindFramebuffer(MetalTexture* tex) +{ + if (!m_current_framebuffer) + return; + + if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void MetalDevice::SetPipeline(GPUPipeline* pipeline) +{ + if (m_current_pipeline == pipeline) + return; + + m_current_pipeline = static_cast(pipeline); + if (InRenderPass()) + { + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + + if (m_current_depth_state != m_current_pipeline->GetDepthState()) + { + m_current_depth_state = m_current_pipeline->GetDepthState(); + [m_render_encoder setDepthStencilState:m_current_depth_state]; + } + if (m_current_cull_mode != m_current_pipeline->GetCullMode()) + { + m_current_cull_mode = m_current_pipeline->GetCullMode(); + [m_render_encoder setCullMode:m_current_cull_mode]; + } + } +} + +void MetalDevice::UnbindPipeline(MetalPipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + m_current_pipeline = nullptr; +} + +void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + DebugAssert(slot < MAX_TEXTURE_SAMPLERS); + + id T = texture ? static_cast(texture)->GetMTLTexture() : nil; + id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; + + if (m_current_textures[slot] != T) + { + m_current_textures[slot] = T; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:T atIndex:slot]; + } + + if (m_current_samplers[slot] != S) + { + m_current_samplers[slot] = S; + if (InRenderPass()) + [m_render_encoder setFragmentSamplerState:S atIndex:slot]; + } +} + +void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ + id B = buffer ? static_cast(buffer)->GetMTLBuffer() : nil; + if (m_current_ssbo == B) + return; + + m_current_ssbo = B; + if (InRenderPass()) + [m_render_encoder setFragmentBuffer:B offset:0 atIndex:1]; +} + +void MetalDevice::UnbindTexture(MetalTexture* tex) +{ + const id T = tex->GetMTLTexture(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == T) + { + m_current_textures[i] = nil; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:nil atIndex:i]; + } + } +} + +void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf) +{ + if (m_current_ssbo != buf->GetMTLBuffer()) + return; + + m_current_ssbo = nil; + if (InRenderPass()) + [m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1]; +} + +void MetalDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle new_vp = Common::Rectangle::FromExtents(x, y, width, height); + if (new_vp == m_current_viewport) + return; + + m_current_viewport = new_vp; + if (InRenderPass()) + SetViewportInRenderEncoder(); +} + +void MetalDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle new_sr = Common::Rectangle::FromExtents(x, y, width, height); + if (new_sr == m_current_scissor) + return; + + m_current_scissor = new_sr; + if (InRenderPass()) + SetScissorInRenderEncoder(); +} + +void MetalDevice::BeginRenderPass() +{ + DebugAssert(m_render_encoder == nil); + + // Inline writes :( + if (m_inline_upload_encoder != nil) + { + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + m_inline_upload_encoder = nil; + } + + MTLRenderPassDescriptor* desc; + if (!m_current_framebuffer) + { + // Rendering to view, but we got interrupted... + desc = [MTLRenderPassDescriptor renderPassDescriptor]; + desc.colorAttachments[0].texture = [m_layer_drawable texture]; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + else + { + desc = m_current_framebuffer->GetDescriptor(); + } + + m_render_encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; + SetInitialEncoderState(); +} + +void MetalDevice::EndRenderPass() +{ + DebugAssert(InRenderPass() && !IsInlineUploading()); + [m_render_encoder endEncoding]; + [m_render_encoder release]; + m_render_encoder = nil; +} + +void MetalDevice::EndInlineUploading() +{ + DebugAssert(IsInlineUploading() && !InRenderPass()); + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + m_inline_upload_encoder = nil; +} + +void MetalDevice::EndAnyEncoding() +{ + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); +} + +void MetalDevice::SetInitialEncoderState() +{ + // Set initial state. + // TODO: avoid uniform set here? it's probably going to get changed... + // Might be better off just deferring all the init until the first draw... + SetUniformBufferInRenderEncoder(); + [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1]; + [m_render_encoder setCullMode:m_current_cull_mode]; + if (m_current_depth_state != nil) + [m_render_encoder setDepthStencilState:m_current_depth_state]; + if (m_current_pipeline != nil) + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + if (m_current_ssbo) + [m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1]; + SetViewportInRenderEncoder(); + SetScissorInRenderEncoder(); +} + +void MetalDevice::SetUniformBufferInRenderEncoder() +{ + [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; + [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; +} + +void MetalDevice::SetViewportInRenderEncoder() +{ + const Common::Rectangle rc = ClampToFramebufferSize(m_current_viewport); + [m_render_encoder + setViewport:(MTLViewport){static_cast(rc.left), static_cast(rc.top), + static_cast(rc.GetWidth()), static_cast(rc.GetHeight()), 0.0, 1.0}]; +} + +void MetalDevice::SetScissorInRenderEncoder() +{ + const Common::Rectangle rc = ClampToFramebufferSize(m_current_scissor); + [m_render_encoder + setScissorRect:(MTLScissorRect){static_cast(rc.left), static_cast(rc.top), + static_cast(rc.GetWidth()), static_cast(rc.GetHeight())}]; +} + +Common::Rectangle MetalDevice::ClampToFramebufferSize(const Common::Rectangle& rc) const +{ + const s32 clamp_width = m_current_framebuffer ? m_current_framebuffer->GetWidth() : m_window_info.surface_width; + const s32 clamp_height = m_current_framebuffer ? m_current_framebuffer->GetHeight() : m_window_info.surface_height; + return rc.ClampedSize(clamp_width, clamp_height); +} + +void MetalDevice::PreDrawCheck() +{ + if (!InRenderPass()) + BeginRenderPass(); +} + +void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) +{ + PreDrawCheck(); + [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; +} + +void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + PreDrawCheck(); + + const u32 index_offset = base_index * sizeof(u16); + [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() + indexCount:index_count + indexType:MTLIndexTypeUInt16 + indexBuffer:m_index_buffer.GetBuffer() + indexBufferOffset:index_offset + instanceCount:1 + baseVertex:base_vertex + baseInstance:0]; +} + +id MetalDevice::GetBlitEncoder(bool is_inline) +{ + @autoreleasepool + { + if (!is_inline) + { + if (!m_upload_cmdbuf) + { + m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain]; + [m_upload_encoder setLabel:@"Upload Encoder"]; + } + return m_upload_encoder; + } + + // Interleaved with draws. + if (m_inline_upload_encoder != nil) + return m_inline_upload_encoder; + + if (InRenderPass()) + EndRenderPass(); + m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain]; + return m_inline_upload_encoder; + } +} + +bool MetalDevice::BeginPresent(bool skip_present) +{ + @autoreleasepool + { + if (skip_present || m_layer == nil) + return false; + + EndAnyEncoding(); + + m_layer_drawable = [[m_layer nextDrawable] retain]; + if (m_layer_drawable == nil) + return false; + + SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); + + // Set up rendering to layer. + id layer_texture = [m_layer_drawable texture]; + m_current_framebuffer = nullptr; + m_layer_pass_desc.colorAttachments[0].texture = layer_texture; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; + m_current_pipeline = nullptr; + m_current_depth_state = nil; + SetInitialEncoderState(); + return true; + } +} + +void MetalDevice::EndPresent() +{ + DebugAssert(!m_current_framebuffer); + EndAnyEncoding(); + + [m_render_cmdbuf presentDrawable:m_layer_drawable]; + [m_layer_drawable release]; + m_layer_drawable = nil; + SubmitCommandBuffer(); +} + +void MetalDevice::CreateCommandBuffer() +{ + @autoreleasepool + { + DebugAssert(m_render_cmdbuf == nil); + const u64 fence_counter = ++m_current_fence_counter; + m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id) { + CommandBufferCompletedOffThread(fence_counter); + }]; + } + + CleanupObjects(); +} + +void MetalDevice::CommandBufferCompletedOffThread(u64 fence_counter) +{ + std::unique_lock lock(m_fence_mutex); + m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), + std::memory_order_release); +} + +void MetalDevice::SubmitCommandBuffer(bool wait_for_completion) +{ + if (m_upload_cmdbuf != nil) + { + [m_upload_encoder endEncoding]; + [m_upload_encoder release]; + m_upload_encoder = nil; + [m_upload_cmdbuf commit]; + [m_upload_cmdbuf release]; + m_upload_cmdbuf = nil; + } + + if (m_render_cmdbuf != nil) + { + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); + + [m_render_cmdbuf commit]; + + if (wait_for_completion) + [m_render_cmdbuf waitUntilCompleted]; + + [m_render_cmdbuf release]; + m_render_cmdbuf = nil; + } + + CreateCommandBuffer(); +} + +void MetalDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) +{ + Log_DevPrintf("Submitting command buffer and restarting render pass due to %s", reason); + + const bool in_render_pass = InRenderPass(); + SubmitCommandBuffer(); + if (in_render_pass) + BeginRenderPass(); +} + +void MetalDevice::WaitForFenceCounter(u64 counter) +{ + if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter) + return; + + // TODO: There has to be a better way to do this.. + std::unique_lock lock(m_fence_mutex); + while (m_completed_fence_counter.load(std::memory_order_acquire) < counter) + { + lock.unlock(); + pthread_yield_np(); + lock.lock(); + } + + CleanupObjects(); +} + +void MetalDevice::WaitForPreviousCommandBuffers() +{ + // Early init? + if (m_current_fence_counter == 0) + return; + + WaitForFenceCounter(m_current_fence_counter - 1); +} + +void MetalDevice::CleanupObjects() +{ + const u64 counter = m_completed_fence_counter.load(std::memory_order_acquire); + while (m_cleanup_objects.size() > 0 && m_cleanup_objects.front().first <= counter) + { + [m_cleanup_objects.front().second release]; + m_cleanup_objects.pop_front(); + } +} + +void MetalDevice::DeferRelease(id obj) +{ + MetalDevice& dev = GetInstance(); + dev.m_cleanup_objects.emplace_back(dev.m_current_fence_counter, obj); +} + +void MetalDevice::DeferRelease(u64 fence_counter, id obj) +{ + MetalDevice& dev = GetInstance(); + dev.m_cleanup_objects.emplace_back(fence_counter, obj); +} + +std::unique_ptr GPUDevice::WrapNewMetalDevice() +{ + return std::unique_ptr(new MetalDevice()); +} + +GPUDevice::AdapterAndModeList GPUDevice::WrapGetMetalAdapterAndModeList() +{ + return MetalDevice::StaticGetAdapterAndModeList(); +} diff --git a/src/util/metal_stream_buffer.h b/src/util/metal_stream_buffer.h new file mode 100644 index 000000000..65ccfa54f --- /dev/null +++ b/src/util/metal_stream_buffer.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/types.h" + +#include +#include + +#ifndef __OBJC__ +#error This file needs to be compiled with Objective C++. +#endif + +#if __has_feature(objc_arc) +#error ARC should not be enabled. +#endif + +#include +#include + +class MetalStreamBuffer +{ +public: + MetalStreamBuffer(); + MetalStreamBuffer(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer(const MetalStreamBuffer&) = delete; + ~MetalStreamBuffer(); + + MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete; + + ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); } + ALWAYS_INLINE id GetBuffer() const { return m_buffer; } + ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } + ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } + ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } + ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } + + bool Create(id device, u32 size); + void Destroy(); + + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); + +private: + bool AllocateBuffer(u32 size); + void UpdateCurrentFencePosition(); + void UpdateGPUPosition(); + + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(u32 num_bytes); + + u32 m_size = 0; + u32 m_current_offset = 0; + u32 m_current_space = 0; + u32 m_current_gpu_position = 0; + + id m_buffer = nil; + u8* m_host_pointer = nullptr; + + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; +}; diff --git a/src/util/metal_stream_buffer.mm b/src/util/metal_stream_buffer.mm new file mode 100644 index 000000000..7d6bce3b6 --- /dev/null +++ b/src/util/metal_stream_buffer.mm @@ -0,0 +1,255 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "metal_stream_buffer.h" +#include "metal_device.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" + +Log_SetChannel(MetalDevice); + +MetalStreamBuffer::MetalStreamBuffer() = default; + +MetalStreamBuffer::~MetalStreamBuffer() +{ + if (IsValid()) + Destroy(); +} + +bool MetalStreamBuffer::Create(id device, u32 size) +{ + @autoreleasepool + { + const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + + id new_buffer = [device newBufferWithLength:size options:options]; + if (new_buffer == nil) + { + Log_ErrorPrintf("Failed to create buffer."); + return false; + } + + if (IsValid()) + Destroy(); + + // Replace with the new buffer + m_size = size; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + m_buffer = [new_buffer retain]; + m_host_pointer = static_cast([new_buffer contents]); + return true; + } +} + +void MetalStreamBuffer::Destroy() +{ + m_size = 0; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + [m_buffer release]; + m_buffer = nil; + m_host_pointer = nullptr; +} + +bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +{ + const u32 required_bytes = num_bytes + alignment; + + // Check for sane allocations + if (required_bytes > m_size) + { + Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast(num_bytes), + static_cast(m_size)); + Panic("Stream buffer overflow"); + return false; + } + + UpdateGPUPosition(); + + // Is the GPU behind or up to date with our current offset? + if (m_current_offset >= m_current_gpu_position) + { + const u32 remaining_bytes = m_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_size - m_current_offset; + return true; + } + + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_current_space = m_current_gpu_position - 1; + return true; + } + } + + // Is the GPU ahead of our current offset? + if (m_current_offset < m_current_gpu_position) + { + // We have from m_current_offset..m_current_gpu_position space to use. + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_current_gpu_position - m_current_offset - 1; + return true; + } + } + + // Can we find a fence to wait on that will give us enough memory? + if (WaitForClearSpace(required_bytes)) + { + const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; + m_current_offset += align_diff; + m_current_space -= align_diff; + return true; + } + + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. + return false; +} + +void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) +{ + DebugAssert((m_current_offset + final_num_bytes) <= m_size); + DebugAssert(final_num_bytes <= m_current_space); + + m_current_offset += final_num_bytes; + m_current_space -= final_num_bytes; + UpdateCurrentFencePosition(); +} + +void MetalStreamBuffer::UpdateCurrentFencePosition() +{ + // Has the offset changed since the last fence? + const u64 counter = MetalDevice::GetCurrentFenceCounter(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) + { + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; + return; + } + + // New buffer, so update the GPU position while we're at it. + m_tracked_fences.emplace_back(counter, m_current_offset); +} + +void MetalStreamBuffer::UpdateGPUPosition() +{ + auto start = m_tracked_fences.begin(); + auto end = start; + + const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) + { + m_current_gpu_position = end->second; + ++end; + } + + if (start != end) + { + m_tracked_fences.erase(start, end); + if (m_current_offset == m_current_gpu_position) + { + // GPU is all caught up now. + m_current_offset = 0; + m_current_gpu_position = 0; + m_current_space = m_size; + } + } +} + +bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) +{ + u32 new_offset = 0; + u32 new_space = 0; + u32 new_gpu_position = 0; + + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); ++iter) + { + // Would this fence bring us in line with the GPU? + // This is the "last resort" case, where a command buffer execution has been forced + // after no additional data has been written to it, so we can assume that after the + // fence has been signaled the entire buffer is now consumed. + u32 gpu_position = iter->second; + if (m_current_offset == gpu_position) + { + new_offset = 0; + new_space = m_size; + new_gpu_position = 0; + break; + } + + // Assuming that we wait for this fence, are we allocating in front of the GPU? + if (m_current_offset > gpu_position) + { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_space = m_size - m_current_offset; + new_gpu_position = gpu_position; + break; + } + + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_space = gpu_position - 1; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. This would give us between the current + // offset and the GPU position worth of space to work with. Again, > because we can't + // align the GPU position with the buffer offset. + u32 available_space_inbetween = gpu_position - m_current_offset; + if (available_space_inbetween > num_bytes) + { + // Leave the offset as-is, but update the GPU position. + new_offset = m_current_offset; + new_space = available_space_inbetween - 1; + new_gpu_position = gpu_position; + break; + } + } + } + + // Did any fences satisfy this condition? + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) + return false; + + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + MetalDevice::GetInstance().WaitForFenceCounter(iter->first); + m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); + m_current_offset = new_offset; + m_current_space = new_space; + m_current_gpu_position = new_gpu_position; + return true; +} diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp new file mode 100644 index 000000000..61e54a004 --- /dev/null +++ b/src/util/opengl_device.cpp @@ -0,0 +1,1062 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "opengl_device.h" +#include "opengl_pipeline.h" +#include "opengl_stream_buffer.h" +#include "opengl_texture.h" +#include "postprocessing_chain.h" // TODO: Remove me + +#include "core/host.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" +#include "common/string_util.h" + +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(OpenGLDevice); + +OpenGLDevice::OpenGLDevice() +{ + // Something which won't be matched.. + std::memset(&m_last_rasterization_state, 0xFF, sizeof(m_last_rasterization_state)); + std::memset(&m_last_depth_state, 0xFF, sizeof(m_last_depth_state)); + std::memset(&m_last_blend_state, 0xFF, sizeof(m_last_blend_state)); +} + +OpenGLDevice::~OpenGLDevice() +{ + Assert(!m_gl_context); + Assert(!m_pipeline_disk_cache_file); +} + +void OpenGLDevice::BindUpdateTextureUnit() +{ + GetInstance().SetActiveTexture(UPDATE_TEXTURE_UNIT - GL_TEXTURE0); +} + +RenderAPI OpenGLDevice::GetRenderAPI() const +{ + return m_gl_context->IsGLES() ? RenderAPI::OpenGLES : RenderAPI::OpenGL; +} + +std::unique_ptr OpenGLDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data, u32 data_stride, bool dynamic /* = false */) +{ + std::unique_ptr tex(std::make_unique()); + if (!tex->Create(width, height, layers, levels, samples, type, format, data, data_stride)) + tex.reset(); + + return tex; +} + +bool OpenGLDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ + OpenGLTexture* T = static_cast(texture); + + GLint alignment; + if (out_data_stride & 1) + alignment = 1; + else if (out_data_stride & 2) + alignment = 2; + else + alignment = 4; + + glPixelStorei(GL_PACK_ALIGNMENT, alignment); + glPixelStorei(GL_PACK_ROW_LENGTH, out_data_stride / T->GetPixelSize()); + + const auto [gl_internal_format, gl_format, gl_type] = OpenGLTexture::GetPixelFormatMapping(T->GetFormat()); + const u32 layer = 0; + const u32 level = 0; + + if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) + { + glGetTextureSubImage(T->GetGLId(), level, x, y, layer, width, height, 1, gl_format, gl_type, + height * out_data_stride, out_data); + } + else + { + if (T->GetLayers() > 1) + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLId(), level, layer); + else + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLTarget(), T->GetGLId(), level); + + DebugAssert(glCheckFramebufferStatus(GL_READ_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + glReadPixels(x, y, width, height, gl_format, gl_type, out_data); + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + } + + return true; +} + +bool OpenGLDevice::SupportsTextureFormat(GPUTexture::Format format) const +{ + const auto [gl_internal_format, gl_format, gl_type] = OpenGLTexture::GetPixelFormatMapping(format); + return (gl_internal_format != static_cast(0)); +} + +void OpenGLDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + OpenGLTexture* D = static_cast(dst); + OpenGLTexture* S = static_cast(src); + CommitClear(D); + CommitClear(S); + + const GLuint sid = S->GetGLId(); + const GLuint did = D->GetGLId(); + if (GLAD_GL_VERSION_4_3 || GLAD_GL_ARB_copy_image) + { + glCopyImageSubData(sid, GL_TEXTURE_2D, src_level, src_x, src_y, src_layer, did, GL_TEXTURE_2D, dst_level, dst_x, + dst_y, dst_layer, width, height, 1); + } + else if (GLAD_GL_EXT_copy_image) + { + glCopyImageSubDataEXT(sid, GL_TEXTURE_2D, src_level, src_x, src_y, src_layer, did, GL_TEXTURE_2D, dst_level, dst_x, + dst_y, dst_layer, width, height, 1); + } + else if (GLAD_GL_OES_copy_image) + { + glCopyImageSubDataOES(sid, GL_TEXTURE_2D, src_level, src_x, src_y, src_layer, did, GL_TEXTURE_2D, dst_level, dst_x, + dst_y, dst_layer, width, height, 1); + } + else + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_read_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); + if (D->IsTextureArray()) + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, did, dst_level, dst_layer); + else + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, D->GetGLTarget(), did, dst_level); + if (S->IsTextureArray()) + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, sid, src_level, src_layer); + else + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, S->GetGLTarget(), sid, src_level); + + glDisable(GL_SCISSOR_TEST); + glBlitFramebuffer(src_x, src_y, src_x + width, src_y + width, dst_x, dst_y, dst_x + width, dst_y + height, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } +} + +void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) +{ + OpenGLTexture* D = static_cast(dst); + OpenGLTexture* S = static_cast(src); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_read_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); + if (D->IsTextureArray()) + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, D->GetGLId(), dst_level, dst_layer); + else + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, D->GetGLTarget(), D->GetGLId(), dst_level); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, S->GetGLTarget(), S->GetGLId(), 0); + + CommitClear(S); + if (width == D->GetMipWidth(dst_level) && height == D->GetMipHeight(dst_level)) + { + D->SetState(GPUTexture::State::Dirty); + if (glInvalidateFramebuffer) + { + const GLenum attachment = GL_COLOR_ATTACHMENT0; + glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); + } + } + else + { + CommitClear(D); + } + + glDisable(GL_SCISSOR_TEST); + glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + glEnable(GL_SCISSOR_TEST); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); +} + +void OpenGLDevice::ClearRenderTarget(GPUTexture* t, u32 c) +{ + GPUDevice::ClearRenderTarget(t, c); + if (m_current_framebuffer && m_current_framebuffer->GetRT() == t) + CommitClear(m_current_framebuffer); +} + +void OpenGLDevice::ClearDepth(GPUTexture* t, float d) +{ + GPUDevice::ClearDepth(t, d); + if (m_current_framebuffer && m_current_framebuffer->GetDS() == t) + CommitClear(m_current_framebuffer); +} + +void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t) +{ + GPUDevice::InvalidateRenderTarget(t); + if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + CommitClear(m_current_framebuffer); +} + +void OpenGLDevice::PushDebugGroup(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!glPushDebugGroup) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + if (!buf.empty()) + glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, static_cast(buf.size()), buf.c_str()); +#endif +} + +void OpenGLDevice::PopDebugGroup() +{ +#ifdef _DEBUG + if (!glPopDebugGroup) + return; + + glPopDebugGroup(); +#endif +} + +void OpenGLDevice::InsertDebugMessage(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!glDebugMessageInsert) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + if (!buf.empty()) + { + glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, GL_DEBUG_TYPE_OTHER, 0, GL_DEBUG_SEVERITY_NOTIFICATION, + static_cast(buf.size()), buf.c_str()); + } +#endif +} + +void OpenGLDevice::SetVSync(bool enabled) +{ + if (m_vsync_enabled == enabled) + return; + + m_vsync_enabled = enabled; + SetSwapInterval(); +} + +static void APIENTRY GLDebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, + const GLchar* message, const void* userParam) +{ + switch (severity) + { + case GL_DEBUG_SEVERITY_HIGH_KHR: + Log_ErrorPrint(message); + break; + case GL_DEBUG_SEVERITY_MEDIUM_KHR: + Log_WarningPrint(message); + break; + case GL_DEBUG_SEVERITY_LOW_KHR: + Log_InfoPrint(message); + break; + case GL_DEBUG_SEVERITY_NOTIFICATION: + // Log_DebugPrint(message); + break; + } +} + +bool OpenGLDevice::HasSurface() const +{ + return m_window_info.type != WindowInfo::Type::Surfaceless; +} + +bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation) +{ + m_gl_context = GL::Context::Create(m_window_info); + if (!m_gl_context) + { + Log_ErrorPrintf("Failed to create any GL context"); + m_gl_context.reset(); + return false; + } + + // Is this needed? + m_window_info = m_gl_context->GetWindowInfo(); + + const bool opengl_is_available = + ((!m_gl_context->IsGLES() && (GLAD_GL_VERSION_3_0 || GLAD_GL_ARB_uniform_buffer_object)) || + (m_gl_context->IsGLES() && GLAD_GL_ES_VERSION_3_1)); + if (!opengl_is_available) + { + Host::ReportErrorAsync(TRANSLATE_SV("GPUDevice", "Error"), + TRANSLATE_SV("GPUDevice", "OpenGL renderer unavailable, your driver or hardware is not " + "recent enough. OpenGL 3.1 or OpenGL ES 3.1 is required.")); + m_gl_context.reset(); + return false; + } + + SetSwapInterval(); + if (HasSurface()) + RenderBlankFrame(); + + if (m_debug_device && GLAD_GL_KHR_debug) + { + if (m_gl_context->IsGLES()) + glDebugMessageCallbackKHR(GLDebugCallback, nullptr); + else + glDebugMessageCallback(GLDebugCallback, nullptr); + + glEnable(GL_DEBUG_OUTPUT); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + } + else + { + // Nail the function pointers so that we don't waste time calling them. + glPushDebugGroup = nullptr; + glPopDebugGroup = nullptr; + glDebugMessageInsert = nullptr; + glObjectLabel = nullptr; + } + + bool buggy_pbo; + if (!CheckFeatures(&buggy_pbo)) + return false; + + if (!CreateBuffers(buggy_pbo)) + return false; + + return true; +} + +bool OpenGLDevice::CheckFeatures(bool* buggy_pbo) +{ + const bool is_gles = m_gl_context->IsGLES(); + + // bool vendor_id_amd = false; + // bool vendor_id_nvidia = false; + // bool vendor_id_intel = false; + bool vendor_id_arm = false; + bool vendor_id_qualcomm = false; + bool vendor_id_powervr = false; + + const char* vendor = (const char*)glGetString(GL_VENDOR); + const char* renderer = (const char*)glGetString(GL_RENDERER); + if (std::strstr(vendor, "Advanced Micro Devices") || std::strstr(vendor, "ATI Technologies Inc.") || + std::strstr(vendor, "ATI")) + { + Log_InfoPrint("AMD GPU detected."); + // vendor_id_amd = true; + } + else if (std::strstr(vendor, "NVIDIA Corporation")) + { + Log_InfoPrint("NVIDIA GPU detected."); + // vendor_id_nvidia = true; + } + else if (std::strstr(vendor, "Intel")) + { + Log_InfoPrint("Intel GPU detected."); + // vendor_id_intel = true; + } + else if (std::strstr(vendor, "ARM")) + { + Log_InfoPrint("ARM GPU detected."); + vendor_id_arm = true; + } + else if (std::strstr(vendor, "Qualcomm")) + { + Log_InfoPrint("Qualcomm GPU detected."); + vendor_id_qualcomm = true; + } + else if (std::strstr(vendor, "Imagination Technologies") || std::strstr(renderer, "PowerVR")) + { + Log_InfoPrint("PowerVR GPU detected."); + vendor_id_powervr = true; + } + + // Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just + // using the normal texture update routines and letting the driver take care of it. PBOs are also completely + // broken on mobile drivers. + const bool is_shitty_mobile_driver = (vendor_id_powervr || vendor_id_qualcomm || vendor_id_arm); + const bool is_buggy_pbo = + (!GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage) || is_shitty_mobile_driver; + *buggy_pbo = true;// is_buggy_pbo; + if (is_buggy_pbo && !is_shitty_mobile_driver) + Log_WarningPrint("Not using PBOs for texture uploads because buffer_storage is unavailable."); + + GLint max_texture_size = 1024; + GLint max_samples = 1; + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); + glGetIntegerv(GL_MAX_SAMPLES, &max_samples); + m_max_texture_size = std::max(1024u, static_cast(max_texture_size)); + m_max_multisamples = std::max(1u, static_cast(max_samples)); + + GLint max_dual_source_draw_buffers = 0; + glGetIntegerv(GL_MAX_DUAL_SOURCE_DRAW_BUFFERS, &max_dual_source_draw_buffers); + m_features.dual_source_blend = + (max_dual_source_draw_buffers > 0) && + (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); + +#ifdef __APPLE__ + // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. + m_features.supports_texture_buffers = false; +#else + m_features.supports_texture_buffers = (GLAD_GL_VERSION_3_1 || GLAD_GL_ES_VERSION_3_2); + + // And Samsung's ANGLE/GLES driver? + if (std::strstr(reinterpret_cast(glGetString(GL_RENDERER)), "ANGLE")) + m_features.supports_texture_buffers = false; +#endif + + if (!m_features.supports_texture_buffers) + { + // Try SSBOs. + GLint max_fragment_storage_blocks = 0; + GLint64 max_ssbo_size = 0; + if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object) + { + glGetIntegerv(GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, &max_fragment_storage_blocks); + glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); + } + + Log_InfoPrintf("Max fragment shader storage blocks: %d", max_fragment_storage_blocks); + Log_InfoPrintf("Max shader storage buffer size: %" PRId64, max_ssbo_size); + m_features.texture_buffers_emulated_with_ssbo = + (max_fragment_storage_blocks > 0 && max_ssbo_size >= static_cast(1024 * 512 * sizeof(u16))); + if (m_features.texture_buffers_emulated_with_ssbo) + { + Log_InfoPrintf("Using shader storage buffers for VRAM writes."); + m_features.supports_texture_buffers = true; + } + else + { + Host::ReportErrorAsync( + TRANSLATE_SV("GPUDevice", "Error"), + TRANSLATE_SV("Error", "Both texture buffers and SSBOs are not supported, or are of inadequate size.")); + return false; + } + } + + m_features.per_sample_shading = GLAD_GL_VERSION_4_0 || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_ARB_sample_shading; + + // noperspective is not supported in GLSL ES. + m_features.noperspective_interpolation = !is_gles; + + m_features.gpu_timing = !(m_gl_context->IsGLES() && + (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)); + m_features.partial_msaa_resolve = true; + + m_features.shader_cache = false; + + m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary; + if (m_features.pipeline_cache) + { + // check that there's at least one format and the extension isn't being "faked" + GLint num_formats = 0; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + Log_InfoPrintf("%u program binary formats supported by driver", num_formats); + m_features.pipeline_cache = (num_formats > 0); + } + + if (!m_features.pipeline_cache) + { + Log_WarningPrintf("Your GL driver does not support program binaries. Hopefully it has a built-in cache, otherwise " + "startup will be slow due to compiling shaders."); + } + + return true; +} + +void OpenGLDevice::DestroyDevice() +{ + if (!m_gl_context) + return; + + ClosePipelineCache(); + DestroyBuffers(); + + m_gl_context->DoneCurrent(); + m_gl_context.reset(); +} + +bool OpenGLDevice::UpdateWindow() +{ + Assert(m_gl_context); + + DestroySurface(); + + if (!AcquireWindow(false)) + return false; + + if (!m_gl_context->ChangeSurface(m_window_info)) + { + Log_ErrorPrintf("Failed to change surface"); + return false; + } + + m_window_info = m_gl_context->GetWindowInfo(); + + if (m_window_info.type != WindowInfo::Type::Surfaceless) + { + // reset vsync rate, since it (usually) gets lost + SetSwapInterval(); + RenderBlankFrame(); + } + + return true; +} + +void OpenGLDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + m_window_info.surface_scale = new_window_scale; + if (m_window_info.surface_width == static_cast(new_window_width) && + m_window_info.surface_height == static_cast(new_window_height)) + { + return; + } + + m_gl_context->ResizeSurface(static_cast(new_window_width), static_cast(new_window_height)); + m_window_info = m_gl_context->GetWindowInfo(); +} + +std::string OpenGLDevice::GetDriverInfo() const +{ + const char* gl_vendor = reinterpret_cast(glGetString(GL_VENDOR)); + const char* gl_renderer = reinterpret_cast(glGetString(GL_RENDERER)); + const char* gl_version = reinterpret_cast(glGetString(GL_VERSION)); + const char* gl_shading_language_version = reinterpret_cast(glGetString(GL_SHADING_LANGUAGE_VERSION)); + return fmt::format("OpenGL Context:\n{}\n{} {}\nGLSL: {}", gl_version, gl_vendor, gl_renderer, + gl_shading_language_version); +} + +void OpenGLDevice::SetSwapInterval() +{ + if (m_window_info.type == WindowInfo::Type::Surfaceless) + return; + + // Window framebuffer has to be bound to call SetSwapInterval. + const s32 interval = m_vsync_enabled ? 1 : 0; + GLint current_fbo = 0; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ¤t_fbo); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + + if (!m_gl_context->SetSwapInterval(interval)) + Log_WarningPrintf("Failed to set swap interval to %d", interval); + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, current_fbo); +} + +void OpenGLDevice::RenderBlankFrame() +{ + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + glDisable(GL_SCISSOR_TEST); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + m_gl_context->SwapBuffers(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glEnable(GL_SCISSOR_TEST); +} + +GPUDevice::AdapterAndModeList OpenGLDevice::GetAdapterAndModeList() +{ + AdapterAndModeList aml; + + if (m_gl_context) + { + for (const GL::Context::FullscreenModeInfo& fmi : m_gl_context->EnumerateFullscreenModes()) + { + aml.fullscreen_modes.push_back(GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); + } + } + + return aml; +} + +void OpenGLDevice::DestroySurface() +{ + if (!m_gl_context) + return; + + m_window_info.SetSurfaceless(); + if (!m_gl_context->ChangeSurface(m_window_info)) + Log_ErrorPrintf("Failed to switch to surfaceless"); +} + +bool OpenGLDevice::CreateBuffers(bool buggy_pbo) +{ + if (!(m_vertex_buffer = OpenGLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE)) || + !(m_index_buffer = OpenGLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE)) || + !(m_uniform_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE))) + { + Log_ErrorPrintf("Failed to create one or more device buffers."); + return false; + } + + GL_OBJECT_NAME(m_vertex_buffer, "Device Vertex Buffer"); + GL_OBJECT_NAME(m_index_buffer, "Device Index Buffer"); + GL_OBJECT_NAME(m_uniform_buffer, "Device Uniform Buffer"); + + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); + + if (!buggy_pbo) + { + if (!(m_texture_stream_buffer = OpenGLStreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_STREAM_BUFFER_SIZE))) + { + Log_ErrorPrintf("Failed to create texture stream buffer"); + return false; + } + + // Need to unbind otherwise normal uploads will fail. + m_texture_stream_buffer->Unbind(); + + GL_OBJECT_NAME(m_texture_stream_buffer, "Device Texture Stream Buffer"); + } + + GLuint fbos[2]; + glGetError(); + glGenFramebuffers(static_cast(std::size(fbos)), fbos); + if (const GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create framebuffers: %u", err); + return false; + } + m_read_fbo = fbos[0]; + m_write_fbo = fbos[1]; + + // Read FBO gets left bound. + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_read_fbo); + + return true; +} + +void OpenGLDevice::DestroyBuffers() +{ + if (m_write_fbo != 0) + glDeleteFramebuffers(1, &m_write_fbo); + if (m_read_fbo != 0) + glDeleteFramebuffers(1, &m_read_fbo); + m_texture_stream_buffer.reset(); + m_uniform_buffer.reset(); + m_index_buffer.reset(); + m_vertex_buffer.reset(); +} + +bool OpenGLDevice::BeginPresent(bool skip_present) +{ + if (skip_present || m_window_info.type == WindowInfo::Type::Surfaceless) + { + if (!skip_present) + glFlush(); + return false; + } + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + glDisable(GL_SCISSOR_TEST); + glClearColor(0.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glEnable(GL_SCISSOR_TEST); + + const Common::Rectangle window_rc = + Common::Rectangle::FromExtents(0, 0, m_window_info.surface_width, m_window_info.surface_height); + m_current_framebuffer = nullptr; + m_last_viewport = window_rc; + m_last_scissor = window_rc; + UpdateViewport(); + UpdateScissor(); + return true; +} + +void OpenGLDevice::EndPresent() +{ + DebugAssert(!m_current_framebuffer); + + if (m_gpu_timing_enabled) + PopTimestampQuery(); + + m_gl_context->SwapBuffers(); + + if (m_gpu_timing_enabled) + KickTimestampQuery(); +} + +void OpenGLDevice::CreateTimestampQueries() +{ + const bool gles = m_gl_context->IsGLES(); + const auto GenQueries = gles ? glGenQueriesEXT : glGenQueries; + + GenQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); + KickTimestampQuery(); +} + +void OpenGLDevice::DestroyTimestampQueries() +{ + if (m_timestamp_queries[0] == 0) + return; + + const bool gles = m_gl_context->IsGLES(); + const auto DeleteQueries = gles ? glDeleteQueriesEXT : glDeleteQueries; + + if (m_timestamp_query_started) + { + const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; + EndQuery(GL_TIME_ELAPSED); + } + + DeleteQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); + m_timestamp_queries.fill(0); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = false; +} + +void OpenGLDevice::PopTimestampQuery() +{ + const bool gles = m_gl_context->IsGLES(); + + if (gles) + { + GLint disjoint = 0; + glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); + if (disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + if (m_timestamp_query_started) + glEndQueryEXT(GL_TIME_ELAPSED); + + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = false; + } + } + + while (m_waiting_timestamp_queries > 0) + { + const auto GetQueryObjectiv = gles ? glGetQueryObjectivEXT : glGetQueryObjectiv; + const auto GetQueryObjectui64v = gles ? glGetQueryObjectui64vEXT : glGetQueryObjectui64v; + + GLint available = 0; + GetQueryObjectiv(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT_AVAILABLE, &available); + if (!available) + break; + + u64 result = 0; + GetQueryObjectui64v(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT, &result); + m_accumulated_gpu_time += static_cast(static_cast(result) / 1000000.0); + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + + if (m_timestamp_query_started) + { + const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; + EndQuery(GL_TIME_ELAPSED); + + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void OpenGLDevice::KickTimestampQuery() +{ + if (m_timestamp_query_started || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + const bool gles = m_gl_context->IsGLES(); + const auto BeginQuery = gles ? glBeginQueryEXT : glBeginQuery; + + BeginQuery(GL_TIME_ELAPSED, m_timestamp_queries[m_write_timestamp_query]); + m_timestamp_query_started = true; +} + +bool OpenGLDevice::SetGPUTimingEnabled(bool enabled) +{ + if (m_gpu_timing_enabled == enabled) + return true; + else if (!m_features.gpu_timing) + return false; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + CreateTimestampQueries(); + else + DestroyTimestampQueries(); + + return true; +} + +float OpenGLDevice::GetAndResetAccumulatedGPUTime() +{ + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +} + +void OpenGLDevice::SetActiveTexture(u32 slot) +{ + if (m_last_texture_unit != slot) + { + m_last_texture_unit = slot; + glActiveTexture(GL_TEXTURE0 + slot); + } +} + +void OpenGLDevice::UnbindTexture(GLuint id) +{ + for (u32 slot = 0; slot < MAX_TEXTURE_SAMPLERS; slot++) + { + auto& ss = m_last_samplers[slot]; + if (ss.first == id) + { + ss.first = 0; + + const GLenum unit = GL_TEXTURE0 + slot; + if (m_last_texture_unit != unit) + { + m_last_texture_unit = unit; + glActiveTexture(unit); + } + + glBindTexture(GL_TEXTURE_2D, 0); + } + } +} + +void OpenGLDevice::UnbindSSBO(GLuint id) +{ + if (m_last_ssbo != id) + return; + + m_last_ssbo = 0; + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); +} + +void OpenGLDevice::UnbindSampler(GLuint id) +{ + for (u32 slot = 0; slot < MAX_TEXTURE_SAMPLERS; slot++) + { + auto& ss = m_last_samplers[slot]; + if (ss.second == id) + { + ss.second = 0; + glBindSampler(slot, 0); + } + } +} + +void OpenGLDevice::UnbindFramebuffer(const OpenGLFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + { + m_current_framebuffer = nullptr; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + } +} + +void OpenGLDevice::UnbindPipeline(const OpenGLPipeline* pl) +{ + if (m_current_pipeline == pl) + { + m_current_pipeline = nullptr; + glUseProgram(0); + } +} + +void OpenGLDevice::Draw(u32 vertex_count, u32 base_vertex) +{ + glDrawArrays(m_current_pipeline->GetTopology(), base_vertex, vertex_count); +} + +void OpenGLDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + const void* indices = reinterpret_cast(static_cast(base_index) * sizeof(u16)); + glDrawElementsBaseVertex(m_current_pipeline->GetTopology(), index_count, GL_UNSIGNED_SHORT, indices, base_vertex); +} + +void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const auto res = m_vertex_buffer->Map(vertex_size, vertex_size * vertex_count); + *map_ptr = res.pointer; + *map_space = res.space_aligned; + *map_base_vertex = res.index_aligned; +} + +void OpenGLDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer->Unmap(vertex_size * vertex_count); +} + +void OpenGLDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const auto res = m_index_buffer->Map(sizeof(DrawIndex), sizeof(DrawIndex) * index_count); + *map_ptr = static_cast(res.pointer); + *map_space = res.space_aligned; + *map_base_index = res.index_aligned; +} + +void OpenGLDevice::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer->Unmap(sizeof(DrawIndex) * used_index_count); +} + +void OpenGLDevice::PushUniformBuffer(const void* data, u32 data_size) +{ + const auto res = m_uniform_buffer->Map(m_uniform_buffer_alignment, data_size); + std::memcpy(res.pointer, data, data_size); + m_uniform_buffer->Unmap(data_size); + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), res.buffer_offset, data_size); +} + +void* OpenGLDevice::MapUniformBuffer(u32 size) +{ + const auto res = m_uniform_buffer->Map(m_uniform_buffer_alignment, size); + return res.pointer; +} + +void OpenGLDevice::UnmapUniformBuffer(u32 size) +{ + const u32 pos = m_uniform_buffer->Unmap(size); + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size); +} + +void OpenGLDevice::SetFramebuffer(GPUFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + return; + + OpenGLFramebuffer* FB = static_cast(fb); + const bool prev_was_window = (m_current_framebuffer == nullptr); + const bool new_is_window = (FB == nullptr); + m_current_framebuffer = FB; + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FB ? FB->GetGLId() : 0); + if (prev_was_window != new_is_window) + { + UpdateViewport(); + UpdateScissor(); + } +} + +void OpenGLDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + DebugAssert(slot < MAX_TEXTURE_SAMPLERS); + auto& sslot = m_last_samplers[slot]; + + const OpenGLTexture* T = static_cast(texture); + const GLuint Tid = T ? T->GetGLId() : 0; + if (sslot.first != Tid) + { + sslot.first = Tid; + + SetActiveTexture(slot); + glBindTexture(T ? T->GetGLTarget() : GL_TEXTURE_2D, T ? T->GetGLId() : 0); + } + + const GLuint Sid = sampler ? static_cast(sampler)->GetID() : 0; + if (sslot.second != Sid) + { + sslot.second = Sid; + glBindSampler(slot, Sid); + } +} + +void OpenGLDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ + const OpenGLTextureBuffer* B = static_cast(buffer); + if (!m_features.texture_buffers_emulated_with_ssbo) + { + const GLuint Tid = B ? B->GetTextureId() : 0; + if (m_last_samplers[slot].first != Tid) + { + m_last_samplers[slot].first = Tid; + SetActiveTexture(slot); + glBindTexture(GL_TEXTURE_BUFFER, Tid); + } + } + else + { + DebugAssert(slot == 0); + const GLuint bid = B ? B->GetBuffer()->GetGLBufferId() : 0; + if (m_last_ssbo == bid) + return; + + m_last_ssbo = bid; + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, slot, bid); + } +} + +void OpenGLDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_last_viewport == rc) + return; + + m_last_viewport = rc; + UpdateViewport(); +} + +void OpenGLDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_last_scissor == rc) + return; + + m_last_scissor = rc; + UpdateScissor(); +} + +std::tuple OpenGLDevice::GetFlippedViewportScissor(const Common::Rectangle& rc) const +{ + // Only when rendering to window framebuffer. + // We draw everything else upside-down. + s32 x, y, width, height; + if (!m_current_framebuffer) + { + const s32 sh = static_cast(m_window_info.surface_height); + const s32 rh = rc.GetHeight(); + x = rc.left; + y = sh - rc.top - rh; + width = rc.GetWidth(); + height = rh; + } + else + { + x = rc.left; + y = rc.top; + width = rc.GetWidth(); + height = rc.GetHeight(); + } + return std::tie(x, y, width, height); +} + +void OpenGLDevice::UpdateViewport() +{ + const auto& [x, y, width, height] = GetFlippedViewportScissor(m_last_viewport); + glViewport(x, y, width, height); +} + +void OpenGLDevice::UpdateScissor() +{ + const auto& [x, y, width, height] = GetFlippedViewportScissor(m_last_scissor); + glScissor(x, y, width, height); +} diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h new file mode 100644 index 000000000..de7f0bb12 --- /dev/null +++ b/src/util/opengl_device.h @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gl/context.h" +#include "gpu_device.h" +#include "gpu_shader_cache.h" +#include "opengl_loader.h" +#include "opengl_pipeline.h" +#include "opengl_texture.h" + +#include "common/rectangle.h" + +#include +#include +#include + +class OpenGLFramebuffer; +class OpenGLPipeline; +class OpenGLStreamBuffer; +class OpenGLTexture; + +class OpenGLDevice final : public GPUDevice +{ +public: + OpenGLDevice(); + ~OpenGLDevice(); + + ALWAYS_INLINE static OpenGLDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } + ALWAYS_INLINE static OpenGLStreamBuffer* GetTextureStreamBuffer() + { + return GetInstance().m_texture_stream_buffer.get(); + } + static void BindUpdateTextureUnit(); + + ALWAYS_INLINE GL::Context* GetGLContext() const { return m_gl_context.get(); } + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + void DestroySurface() override; + + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + + std::string GetDriverInfo() const override; + + AdapterAndModeList GetAdapterAndModeList() override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 width, u32 height) override; + void ClearRenderTarget(GPUTexture* t, u32 c) override; + void ClearDepth(GPUTexture* t, float d) override; + void InvalidateRenderTarget(GPUTexture* t) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, DynamicHeapArray* out_binary) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void CommitClear(OpenGLTexture* tex); + void CommitClear(OpenGLFramebuffer* fb); // Assumes the FB has been bound. + + GLuint LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, const GPUPipeline::GraphicsConfig& plconfig); + GLuint CompileProgram(const GPUPipeline::GraphicsConfig& plconfig); + void PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig, GLuint program_id); + void UnrefProgram(const OpenGLPipeline::ProgramCacheKey& key); + + GLuint LookupVAOCache(const OpenGLPipeline::VertexArrayCacheKey& key); + GLuint CreateVAO(gsl::span attributes, u32 stride); + void UnrefVAO(const OpenGLPipeline::VertexArrayCacheKey& key); + + void SetActiveTexture(u32 slot); + void UnbindTexture(GLuint id); + void UnbindSSBO(GLuint id); + void UnbindSampler(GLuint id); + void UnbindFramebuffer(const OpenGLFramebuffer* fb); + void UnbindPipeline(const OpenGLPipeline* pl); + +protected: + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) override; + void DestroyDevice() override; + + bool ReadPipelineCache(const std::string& filename) override; + bool GetPipelineCacheData(DynamicHeapArray* data) override; + +private: + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; + + static constexpr GLenum UPDATE_TEXTURE_UNIT = GL_TEXTURE8; + + static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; + + bool CheckFeatures(bool* buggy_pbo); + bool CreateBuffers(bool buggy_pbo); + void DestroyBuffers(); + + void SetSwapInterval(); + void RenderBlankFrame(); + + std::tuple GetFlippedViewportScissor(const Common::Rectangle& rc) const; + void UpdateViewport(); + void UpdateScissor(); + + void CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + + GLuint CreateProgramFromPipelineCache(const OpenGLPipeline::ProgramCacheItem& it, + const GPUPipeline::GraphicsConfig& plconfig); + void AddToPipelineCache(OpenGLPipeline::ProgramCacheItem* it); + bool DiscardPipelineCache(); + void ClosePipelineCache(); + + std::unique_ptr m_gl_context; + std::unique_ptr m_window_framebuffer; + + std::unique_ptr m_vertex_buffer; + std::unique_ptr m_index_buffer; + std::unique_ptr m_uniform_buffer; + std::unique_ptr m_texture_stream_buffer; + + // TODO: pass in file instead of blob for pipeline cache + OpenGLPipeline::VertexArrayCache m_vao_cache; + OpenGLPipeline::ProgramCache m_program_cache; + + // VAO cache - fixed max as key + GPUPipeline::BlendState m_last_blend_state = {}; + GPUPipeline::RasterizationState m_last_rasterization_state = {}; + GPUPipeline::DepthState m_last_depth_state = {}; + GLuint m_uniform_buffer_alignment = 1; + GLuint m_last_program = 0; + GLuint m_last_vao = 0; + u32 m_last_texture_unit = 0; + std::array, MAX_TEXTURE_SAMPLERS> m_last_samplers = {}; + GLuint m_last_ssbo = 0; + Common::Rectangle m_last_viewport{0, 0, 1, 1}; + Common::Rectangle m_last_scissor{0, 0, 1, 1}; + + // Misc framebuffers + GLuint m_read_fbo = 0; + GLuint m_write_fbo = 0; + + OpenGLFramebuffer* m_current_framebuffer = nullptr; + OpenGLPipeline* m_current_pipeline = nullptr; + + std::array m_timestamp_queries = {}; + float m_accumulated_gpu_time = 0.0f; + u8 m_read_timestamp_query = 0; + u8 m_write_timestamp_query = 0; + u8 m_waiting_timestamp_queries = 0; + bool m_timestamp_query_started = false; + + std::FILE* m_pipeline_disk_cache_file = nullptr; + std::string m_pipeline_disk_cache_filename; + u32 m_pipeline_disk_cache_data_end = 0; + bool m_pipeline_disk_cache_changed = false; +}; diff --git a/src/util/opengl_host_display.cpp b/src/util/opengl_host_display.cpp deleted file mode 100644 index 187fd4259..000000000 --- a/src/util/opengl_host_display.cpp +++ /dev/null @@ -1,1142 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "opengl_host_display.h" -#include "common/align.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/string_util.h" -#include "core/settings.h" -#include "imgui.h" -#include "imgui_impl_opengl3.h" -#include "postprocessing_shadergen.h" -#include -#include -Log_SetChannel(OpenGLHostDisplay); - -enum : u32 -{ - TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024, -}; - -OpenGLHostDisplay::OpenGLHostDisplay() = default; - -OpenGLHostDisplay::~OpenGLHostDisplay() -{ - if (!m_gl_context) - return; - - DestroyResources(); - - m_gl_context->DoneCurrent(); - m_gl_context.reset(); -} - -RenderAPI OpenGLHostDisplay::GetRenderAPI() const -{ - return m_gl_context->IsGLES() ? RenderAPI::OpenGLES : RenderAPI::OpenGL; -} - -void* OpenGLHostDisplay::GetDevice() const -{ - return nullptr; -} - -void* OpenGLHostDisplay::GetContext() const -{ - return m_gl_context.get(); -} - -std::unique_ptr OpenGLHostDisplay::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, - u32 data_stride, bool dynamic /* = false */) -{ - std::unique_ptr tex(std::make_unique()); - if (!tex->Create(width, height, layers, levels, samples, format, data, data_stride)) - tex.reset(); - - return tex; -} - -bool OpenGLHostDisplay::BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - const u32 pixel_size = texture->GetPixelSize(); - const u32 stride = Common::AlignUpPow2(width * pixel_size, 4); - const u32 size_required = stride * height; - GL::StreamBuffer* buffer = UsePBOForUploads() ? GetTextureStreamBuffer() : nullptr; - - if (buffer && size_required < buffer->GetSize()) - { - auto map = buffer->Map(4096, size_required); - m_texture_stream_buffer_offset = map.buffer_offset; - *out_buffer = map.pointer; - *out_pitch = stride; - } - else - { - std::vector& repack_buffer = GetTextureRepackBuffer(); - if (repack_buffer.size() < size_required) - repack_buffer.resize(size_required); - - *out_buffer = repack_buffer.data(); - *out_pitch = stride; - } - - return true; -} - -void OpenGLHostDisplay::EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) -{ - const u32 pixel_size = texture->GetPixelSize(); - const u32 stride = Common::AlignUpPow2(width * pixel_size, 4); - const u32 size_required = stride * height; - GL::Texture* gl_texture = static_cast(texture); - GL::StreamBuffer* buffer = UsePBOForUploads() ? GetTextureStreamBuffer() : nullptr; - - const auto [gl_internal_format, gl_format, gl_type] = GL::Texture::GetPixelFormatMapping(gl_texture->GetFormat()); - const bool whole_texture = (!gl_texture->UseTextureStorage() && x == 0 && y == 0 && width == gl_texture->GetWidth() && - height == gl_texture->GetHeight()); - - gl_texture->Bind(); - if (buffer && size_required < buffer->GetSize()) - { - buffer->Unmap(size_required); - buffer->Bind(); - - if (whole_texture) - { - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, - reinterpret_cast(static_cast(m_texture_stream_buffer_offset))); - } - else - { - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, - reinterpret_cast(static_cast(m_texture_stream_buffer_offset))); - } - - buffer->Unbind(); - } - else - { - std::vector& repack_buffer = GetTextureRepackBuffer(); - if (whole_texture) - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, repack_buffer.data()); - else - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, repack_buffer.data()); - } -} - -bool OpenGLHostDisplay::UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, - u32 pitch) -{ - GL::Texture* gl_texture = static_cast(texture); - const auto [gl_internal_format, gl_format, gl_type] = GL::Texture::GetPixelFormatMapping(gl_texture->GetFormat()); - const u32 pixel_size = gl_texture->GetPixelSize(); - const bool is_packed_tightly = (pitch == (pixel_size * width)); - - const bool whole_texture = (!gl_texture->UseTextureStorage() && x == 0 && y == 0 && width == gl_texture->GetWidth() && - height == gl_texture->GetHeight()); - gl_texture->Bind(); - - // If we have GLES3, we can set row_length. - if (UseGLES3DrawPath() || is_packed_tightly) - { - if (!is_packed_tightly) - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); - - if (whole_texture) - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, data); - else - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, data); - - if (!is_packed_tightly) - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - else - { - // Otherwise, we need to repack the image. - std::vector& repack_buffer = GetTextureRepackBuffer(); - const u32 packed_pitch = width * pixel_size; - const u32 repack_size = packed_pitch * height; - if (repack_buffer.size() < repack_size) - repack_buffer.resize(repack_size); - - StringUtil::StrideMemCpy(repack_buffer.data(), packed_pitch, data, pitch, packed_pitch, height); - - if (whole_texture) - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, repack_buffer.data()); - else - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, repack_buffer.data()); - } - - return true; -} - -bool OpenGLHostDisplay::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - GLint alignment; - if (out_data_stride & 1) - alignment = 1; - else if (out_data_stride & 2) - alignment = 2; - else - alignment = 4; - - GLint old_alignment = 0, old_row_length = 0; - glGetIntegerv(GL_PACK_ALIGNMENT, &old_alignment); - glPixelStorei(GL_PACK_ALIGNMENT, alignment); - if (!m_use_gles2_draw_path) - { - glGetIntegerv(GL_PACK_ROW_LENGTH, &old_row_length); - glPixelStorei(GL_PACK_ROW_LENGTH, out_data_stride / texture->GetPixelSize()); - } - - const auto [gl_internal_format, gl_format, gl_type] = GL::Texture::GetPixelFormatMapping(texture->GetFormat()); - - GL::Texture::GetTextureSubImage(static_cast(texture)->GetGLId(), 0, x, y, 0, width, height, 1, - gl_format, gl_type, height * out_data_stride, out_data); - - glPixelStorei(GL_PACK_ALIGNMENT, old_alignment); - if (!m_use_gles2_draw_path) - glPixelStorei(GL_PACK_ROW_LENGTH, old_row_length); - return true; -} - -bool OpenGLHostDisplay::SupportsTextureFormat(GPUTexture::Format format) const -{ - const auto [gl_internal_format, gl_format, gl_type] = GL::Texture::GetPixelFormatMapping(format); - return (gl_internal_format != static_cast(0)); -} - -void OpenGLHostDisplay::SetVSync(bool enabled) -{ - if (m_vsync_enabled == enabled) - return; - - m_vsync_enabled = enabled; - SetSwapInterval(); -} - -const char* OpenGLHostDisplay::GetGLSLVersionString() const -{ - if (GetRenderAPI() == RenderAPI::OpenGLES) - { - if (GLAD_GL_ES_VERSION_3_0) - return "#version 300 es"; - else - return "#version 100"; - } - else - { - if (GLAD_GL_VERSION_3_3) - return "#version 330"; - else - return "#version 130"; - } -} - -std::string OpenGLHostDisplay::GetGLSLVersionHeader() const -{ - std::string header = GetGLSLVersionString(); - header += "\n\n"; - if (GetRenderAPI() == RenderAPI::OpenGLES) - { - header += "precision highp float;\n"; - header += "precision highp int;\n\n"; - } - - return header; -} - -static void APIENTRY GLDebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, - const GLchar* message, const void* userParam) -{ - switch (severity) - { - case GL_DEBUG_SEVERITY_HIGH_KHR: - Log_ErrorPrint(message); - break; - case GL_DEBUG_SEVERITY_MEDIUM_KHR: - Log_WarningPrint(message); - break; - case GL_DEBUG_SEVERITY_LOW_KHR: - Log_InfoPrint(message); - break; - case GL_DEBUG_SEVERITY_NOTIFICATION: - // Log_DebugPrint(message); - break; - } -} - -bool OpenGLHostDisplay::HasDevice() const -{ - return static_cast(m_gl_context); -} - -bool OpenGLHostDisplay::HasSurface() const -{ - return m_window_info.type != WindowInfo::Type::Surfaceless; -} - -bool OpenGLHostDisplay::CreateDevice(const WindowInfo& wi, bool vsync) -{ - m_gl_context = GL::Context::Create(wi); - if (!m_gl_context) - { - Log_ErrorPrintf("Failed to create any GL context"); - m_gl_context.reset(); - return false; - } - - m_window_info = m_gl_context->GetWindowInfo(); - m_vsync_enabled = vsync; - return true; -} - -bool OpenGLHostDisplay::SetupDevice() -{ - // If we don't have GLES3.1, then SV_VertexID isn't defined when no VBOs are active. - m_use_gles2_draw_path = (GetRenderAPI() == RenderAPI::OpenGLES && !GLAD_GL_ES_VERSION_3_1); - if (!m_use_gles2_draw_path) - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast(&m_uniform_buffer_alignment)); - - // Doubt GLES2 drivers will support PBOs efficiently. - m_use_pbo_for_pixels = !m_use_gles2_draw_path; - if (GetRenderAPI() == RenderAPI::OpenGLES) - { - // Adreno seems to corrupt textures through PBOs... and Mali is slow. - const char* gl_vendor = reinterpret_cast(glGetString(GL_VENDOR)); - if (std::strstr(gl_vendor, "Qualcomm") || std::strstr(gl_vendor, "ARM") || std::strstr(gl_vendor, "Broadcom")) - m_use_pbo_for_pixels = false; - } - - Log_VerbosePrintf("Using GLES2 draw path: %s", m_use_gles2_draw_path ? "yes" : "no"); - Log_VerbosePrintf("Using PBO for streaming: %s", m_use_pbo_for_pixels ? "yes" : "no"); - - if (g_settings.gpu_use_debug_device && GLAD_GL_KHR_debug) - { - if (GetRenderAPI() == RenderAPI::OpenGLES) - glDebugMessageCallbackKHR(GLDebugCallback, nullptr); - else - glDebugMessageCallback(GLDebugCallback, nullptr); - - glEnable(GL_DEBUG_OUTPUT); - glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); - } - - if (!CreateResources()) - return false; - - return true; -} - -bool OpenGLHostDisplay::MakeCurrent() -{ - if (!m_gl_context->MakeCurrent()) - { - Log_ErrorPrintf("Failed to make GL context current"); - return false; - } - - SetSwapInterval(); - return true; -} - -bool OpenGLHostDisplay::DoneCurrent() -{ - return m_gl_context->DoneCurrent(); -} - -bool OpenGLHostDisplay::ChangeWindow(const WindowInfo& new_wi) -{ - Assert(m_gl_context); - - if (!m_gl_context->ChangeSurface(new_wi)) - { - Log_ErrorPrintf("Failed to change surface"); - return false; - } - - m_window_info = m_gl_context->GetWindowInfo(); - - // Update swap interval for new surface. - if (m_gl_context->IsCurrent()) - SetSwapInterval(); - - return true; -} - -void OpenGLHostDisplay::ResizeWindow(s32 new_window_width, s32 new_window_height) -{ - if (!m_gl_context) - return; - - m_gl_context->ResizeSurface(static_cast(new_window_width), static_cast(new_window_height)); - m_window_info = m_gl_context->GetWindowInfo(); -} - -void OpenGLHostDisplay::SetSwapInterval() -{ - if (m_window_info.type == WindowInfo::Type::Surfaceless) - return; - - // Window framebuffer has to be bound to call SetSwapInterval. - const s32 interval = m_vsync_enabled ? 1 : 0; - GLint current_fbo = 0; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ¤t_fbo); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - - if (!m_gl_context->SetSwapInterval(interval)) - Log_WarningPrintf("Failed to set swap interval to %d", interval); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, current_fbo); -} - -bool OpenGLHostDisplay::SupportsFullscreen() const -{ - return false; -} - -bool OpenGLHostDisplay::IsFullscreen() -{ - return false; -} - -bool OpenGLHostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) -{ - return false; -} - -HostDisplay::AdapterAndModeList OpenGLHostDisplay::GetAdapterAndModeList() -{ - AdapterAndModeList aml; - - if (m_gl_context) - { - for (const GL::Context::FullscreenModeInfo& fmi : m_gl_context->EnumerateFullscreenModes()) - { - aml.fullscreen_modes.push_back(GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); - } - } - - return aml; -} - -void OpenGLHostDisplay::DestroySurface() -{ - if (!m_gl_context) - return; - - m_window_info.SetSurfaceless(); - if (!m_gl_context->ChangeSurface(m_window_info)) - Log_ErrorPrintf("Failed to switch to surfaceless"); -} - -bool OpenGLHostDisplay::CreateImGuiContext() -{ - return ImGui_ImplOpenGL3_Init(GetGLSLVersionString()); -} - -void OpenGLHostDisplay::DestroyImGuiContext() -{ - ImGui_ImplOpenGL3_Shutdown(); -} - -bool OpenGLHostDisplay::UpdateImGuiFontTexture() -{ - ImGui_ImplOpenGL3_DestroyFontsTexture(); - return ImGui_ImplOpenGL3_CreateFontsTexture(); -} - -bool OpenGLHostDisplay::CreateResources() -{ - if (!m_use_gles2_draw_path) - { - static constexpr char fullscreen_quad_vertex_shader[] = R"( -uniform vec4 u_src_rect; -out vec2 v_tex0; - -void main() -{ - vec2 pos = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); - v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; - gl_Position = vec4(pos * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); -} -)"; - - static constexpr char display_fragment_shader[] = R"( -uniform sampler2D samp0; - -in vec2 v_tex0; -out vec4 o_col0; - -void main() -{ - o_col0 = vec4(texture(samp0, v_tex0).rgb, 1.0); -} -)"; - - static constexpr char cursor_fragment_shader[] = R"( -uniform sampler2D samp0; - -in vec2 v_tex0; -out vec4 o_col0; - -void main() -{ - o_col0 = texture(samp0, v_tex0); -} -)"; - - if (!m_display_program.Compile(GetGLSLVersionHeader() + fullscreen_quad_vertex_shader, - GetGLSLVersionHeader() + display_fragment_shader) || - !m_cursor_program.Compile(GetGLSLVersionHeader() + fullscreen_quad_vertex_shader, - GetGLSLVersionHeader() + cursor_fragment_shader)) - { - Log_ErrorPrintf("Failed to compile display shaders"); - return false; - } - - if (GetRenderAPI() != RenderAPI::OpenGLES) - { - m_display_program.BindFragData(0, "o_col0"); - m_cursor_program.BindFragData(0, "o_col0"); - } - - if (!m_display_program.Link() || !m_cursor_program.Link()) - { - Log_ErrorPrintf("Failed to link display programs"); - return false; - } - - m_display_program.Bind(); - m_display_program.RegisterUniform("u_src_rect"); - m_display_program.RegisterUniform("samp0"); - m_display_program.Uniform1i(1, 0); - m_cursor_program.Bind(); - m_cursor_program.RegisterUniform("u_src_rect"); - m_cursor_program.RegisterUniform("samp0"); - m_cursor_program.Uniform1i(1, 0); - - glGenVertexArrays(1, &m_display_vao); - - // samplers - glGenSamplers(1, &m_display_nearest_sampler); - glSamplerParameteri(m_display_nearest_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(m_display_nearest_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glGenSamplers(1, &m_display_linear_sampler); - glSamplerParameteri(m_display_linear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(m_display_linear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glGenSamplers(1, &m_display_border_sampler); - glSamplerParameteri(m_display_border_sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(m_display_border_sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - // If we don't have border clamp.. too bad, just hope for the best. - if (!m_gl_context->IsGLES() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_NV_texture_border_clamp || - GLAD_GL_EXT_texture_border_clamp || GLAD_GL_OES_texture_border_clamp) - { - static constexpr const float border_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; - - glSamplerParameteri(m_display_border_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); - glSamplerParameteri(m_display_border_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); - glSamplerParameterfv(m_display_border_sampler, GL_TEXTURE_BORDER_COLOR, border_color); - } - } - else - { - static constexpr char fullscreen_quad_vertex_shader[] = R"( -#version 100 - -attribute highp vec2 a_pos; -attribute highp vec2 a_tex0; -varying highp vec2 v_tex0; - -void main() -{ - gl_Position = vec4(a_pos, 0.0, 1.0); - v_tex0 = a_tex0; -} -)"; - - static constexpr char display_fragment_shader[] = R"( -#version 100 - -uniform highp sampler2D samp0; - -varying highp vec2 v_tex0; - -void main() -{ - gl_FragColor = vec4(texture2D(samp0, v_tex0).rgb, 1.0); -} -)"; - - static constexpr char cursor_fragment_shader[] = R"( -#version 100 - -uniform highp sampler2D samp0; - -varying highp vec2 v_tex0; - -void main() -{ - gl_FragColor = texture2D(samp0, v_tex0); -} -)"; - - if (!m_display_program.Compile(fullscreen_quad_vertex_shader, display_fragment_shader) || - !m_cursor_program.Compile(fullscreen_quad_vertex_shader, cursor_fragment_shader)) - { - Log_ErrorPrintf("Failed to compile display shaders"); - return false; - } - - m_display_program.BindAttribute(0, "a_pos"); - m_display_program.BindAttribute(1, "a_tex0"); - m_cursor_program.BindAttribute(0, "a_pos"); - m_cursor_program.BindAttribute(1, "a_tex0"); - - if (!m_display_program.Link() || !m_cursor_program.Link()) - { - Log_ErrorPrintf("Failed to link display programs"); - return false; - } - - m_display_program.Bind(); - m_display_program.RegisterUniform("samp0"); - m_display_program.Uniform1i(0, 0); - m_cursor_program.Bind(); - m_cursor_program.RegisterUniform("samp0"); - m_cursor_program.Uniform1i(0, 0); - } - - return true; -} - -void OpenGLHostDisplay::DestroyResources() -{ - HostDisplay::DestroyResources(); - - m_post_processing_chain.ClearStages(); - m_post_processing_input_texture.Destroy(); - m_post_processing_ubo.reset(); - m_post_processing_stages.clear(); - - if (m_display_vao != 0) - { - glDeleteVertexArrays(1, &m_display_vao); - m_display_vao = 0; - } - if (m_display_border_sampler != 0) - { - glDeleteSamplers(1, &m_display_border_sampler); - m_display_border_sampler = 0; - } - if (m_display_linear_sampler != 0) - { - glDeleteSamplers(1, &m_display_linear_sampler); - m_display_linear_sampler = 0; - } - if (m_display_nearest_sampler != 0) - { - glDeleteSamplers(1, &m_display_nearest_sampler); - m_display_nearest_sampler = 0; - } - - m_cursor_program.Destroy(); - m_display_program.Destroy(); -} - -bool OpenGLHostDisplay::Render(bool skip_present) -{ - if (skip_present || m_window_info.type == WindowInfo::Type::Surfaceless) - { - if (ImGui::GetCurrentContext()) - ImGui::Render(); - - return false; - } - - glDisable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - - RenderDisplay(); - - if (ImGui::GetCurrentContext()) - RenderImGui(); - - RenderSoftwareCursor(); - - if (m_gpu_timing_enabled) - PopTimestampQuery(); - - m_gl_context->SwapBuffers(); - - if (m_gpu_timing_enabled) - KickTimestampQuery(); - - return true; -} - -bool OpenGLHostDisplay::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - GL::Texture texture; - if (!texture.Create(width, height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0) || !texture.CreateFramebuffer()) - { - return false; - } - - glDisable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(texture.GetGLFramebufferID(), draw_rect.left, - height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - texture.BindFramebuffer(GL_FRAMEBUFFER); - glClear(GL_COLOR_BUFFER_BIT); - - if (HasDisplayTexture()) - { - RenderDisplay(draw_rect.left, height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - IsUsingLinearFiltering()); - } - } - - out_pixels->resize(width * height); - *out_stride = sizeof(u32) * width; - *out_format = GPUTexture::Format::RGBA8; - glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, out_pixels->data()); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - return true; -} - -void OpenGLHostDisplay::RenderImGui() -{ - ImGui::Render(); - ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); - GL::Program::ResetLastProgram(); -} - -void OpenGLHostDisplay::RenderDisplay() -{ - const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(0, left, GetWindowHeight() - top - height, width, height, - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - GetWindowWidth(), GetWindowHeight()); - return; - } - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glClear(GL_COLOR_BUFFER_BIT); - - if (!HasDisplayTexture()) - return; - - RenderDisplay(left, GetWindowHeight() - top - height, width, height, static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, IsUsingLinearFiltering()); -} - -static void DrawFullscreenQuadES2(s32 tex_view_x, s32 tex_view_y, s32 tex_view_width, s32 tex_view_height, - s32 tex_width, s32 tex_height) -{ - const float tex_left = static_cast(tex_view_x) / static_cast(tex_width); - const float tex_right = tex_left + static_cast(tex_view_width) / static_cast(tex_width); - const float tex_top = static_cast(tex_view_y) / static_cast(tex_height); - const float tex_bottom = tex_top + static_cast(tex_view_height) / static_cast(tex_height); - const std::array, 4> vertices = {{ - {{-1.0f, -1.0f, tex_left, tex_bottom}}, // bottom-left - {{1.0f, -1.0f, tex_right, tex_bottom}}, // bottom-right - {{-1.0f, 1.0f, tex_left, tex_top}}, // top-left - {{1.0f, 1.0f, tex_right, tex_top}}, // top-right - }}; - - glBindBuffer(GL_ARRAY_BUFFER, 0); - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]), &vertices[0][0]); - glEnableVertexAttribArray(0); - glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]), &vertices[0][2]); - glEnableVertexAttribArray(1); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glDisableVertexAttribArray(1); - glDisableVertexAttribArray(0); -} - -void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 height, GL::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, bool linear_filter) -{ - glViewport(left, bottom, width, height); - glDisable(GL_BLEND); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); - m_display_program.Bind(); - texture->Bind(); - - const bool linear = IsUsingLinearFiltering(); - - if (!m_use_gles2_draw_path) - { - const float position_adjust = linear ? 0.5f : 0.0f; - const float size_adjust = linear ? 1.0f : 0.0f; - const float flip_adjust = (texture_view_height < 0) ? -1.0f : 1.0f; - m_display_program.Uniform4f( - 0, (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_y) + (position_adjust * flip_adjust)) / static_cast(texture->GetHeight()), - (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_height) - (size_adjust * flip_adjust)) / - static_cast(texture->GetHeight())); - glBindSampler(0, linear_filter ? m_display_linear_sampler : m_display_nearest_sampler); - glBindVertexArray(m_display_vao); - glDrawArrays(GL_TRIANGLES, 0, 3); - glBindSampler(0, 0); - } - else - { - texture->SetLinearFilter(linear_filter); - - DrawFullscreenQuadES2(m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, texture->GetWidth(), texture->GetHeight()); - } -} - -void OpenGLHostDisplay::RenderSoftwareCursor() -{ - if (!HasSoftwareCursor()) - return; - - const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); - RenderSoftwareCursor(left, GetWindowHeight() - top - height, width, height, m_cursor_texture.get()); -} - -void OpenGLHostDisplay::RenderSoftwareCursor(s32 left, s32 bottom, s32 width, s32 height, GPUTexture* texture_handle) -{ - glViewport(left, bottom, width, height); - glEnable(GL_BLEND); - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); - glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); - m_cursor_program.Bind(); - static_cast(texture_handle)->Bind(); - - if (!m_use_gles2_draw_path) - { - m_cursor_program.Uniform4f(0, 0.0f, 0.0f, 1.0f, 1.0f); - glBindSampler(0, m_display_linear_sampler); - glBindVertexArray(m_display_vao); - glDrawArrays(GL_TRIANGLES, 0, 3); - glBindSampler(0, 0); - } - else - { - const s32 tex_width = static_cast(texture_handle->GetWidth()); - const s32 tex_height = static_cast(texture_handle->GetHeight()); - DrawFullscreenQuadES2(0, 0, tex_width, tex_height, tex_width, tex_height); - } -} - -bool OpenGLHostDisplay::SetPostProcessingChain(const std::string_view& config) -{ - if (config.empty()) - { - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return true; - } - - if (!m_post_processing_chain.CreateFromString(config)) - return false; - - m_post_processing_stages.clear(); - - FrontendCommon::PostProcessingShaderGen shadergen(GetRenderAPI(), false); - - for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) - { - const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); - const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); - const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); - - PostProcessingStage stage; - stage.uniforms_size = shader.GetUniformsSize(); - if (!stage.program.Compile(vs, ps)) - { - Log_InfoPrintf("Failed to compile post-processing program, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - if (!shadergen.UseGLSLBindingLayout()) - { - stage.program.BindUniformBlock("UBOBlock", 1); - stage.program.Bind(); - stage.program.Uniform1i("samp0", 0); - } - - if (!stage.program.Link()) - { - Log_InfoPrintf("Failed to link post-processing program, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - m_post_processing_stages.push_back(std::move(stage)); - } - - if (!m_post_processing_ubo) - { - m_post_processing_ubo = GL::StreamBuffer::Create(GL_UNIFORM_BUFFER, 1 * 1024 * 1024); - if (!m_post_processing_ubo) - { - Log_InfoPrintf("Failed to allocate uniform buffer for postprocessing"); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - m_post_processing_ubo->Unbind(); - } - - m_post_processing_timer.Reset(); - return true; -} - -bool OpenGLHostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) -{ - DebugAssert(!m_post_processing_stages.empty()); - - if (m_post_processing_input_texture.GetWidth() != target_width || - m_post_processing_input_texture.GetHeight() != target_height) - { - if (!m_post_processing_input_texture.Create(target_width, target_height, 1, 1, 1, GPUTexture::Format::RGBA8) || - !m_post_processing_input_texture.CreateFramebuffer()) - { - return false; - } - } - - const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); - for (u32 i = 0; i < target_count; i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) - { - if (!pps.output_texture.Create(target_width, target_height, 1, 1, 1, GPUTexture::Format::RGBA8) || - !pps.output_texture.CreateFramebuffer()) - { - return false; - } - } - } - - return true; -} - -void OpenGLHostDisplay::ApplyPostProcessingChain(GLuint final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, GL::Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, - u32 target_width, u32 target_height) -{ - if (!CheckPostProcessingRenderTargets(target_width, target_height)) - { - RenderDisplay(final_left, target_height - final_top - final_height, final_width, final_height, texture, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); - return; - } - - // downsample/upsample - use same viewport for remainder - m_post_processing_input_texture.BindFramebuffer(GL_FRAMEBUFFER); - glClear(GL_COLOR_BUFFER_BIT); - RenderDisplay(final_left, target_height - final_top - final_height, final_width, final_height, texture, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); - - const s32 orig_texture_width = texture_view_width; - const s32 orig_texture_height = texture_view_height; - texture = &m_post_processing_input_texture; - texture_view_x = final_left; - texture_view_y = final_top; - texture_view_width = final_width; - texture_view_height = final_height; - - m_post_processing_ubo->Bind(); - - const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; - for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - glBindFramebuffer(GL_FRAMEBUFFER, (i == final_stage) ? final_target : pps.output_texture.GetGLFramebufferID()); - glClear(GL_COLOR_BUFFER_BIT); - - pps.program.Bind(); - - static_cast(texture)->Bind(); - glBindSampler(0, m_display_border_sampler); - - const auto map_result = m_post_processing_ubo->Map(m_uniform_buffer_alignment, pps.uniforms_size); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - map_result.pointer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, - texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, - static_cast(m_post_processing_timer.GetTimeSeconds())); - m_post_processing_ubo->Unmap(pps.uniforms_size); - glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_post_processing_ubo->GetGLBufferId(), map_result.buffer_offset, - pps.uniforms_size); - - glDrawArrays(GL_TRIANGLES, 0, 3); - - if (i != final_stage) - texture = &pps.output_texture; - } - - glBindSampler(0, 0); - m_post_processing_ubo->Unbind(); -} - -void OpenGLHostDisplay::CreateTimestampQueries() -{ - const bool gles = m_gl_context->IsGLES(); - const auto GenQueries = gles ? glGenQueriesEXT : glGenQueries; - - GenQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); - KickTimestampQuery(); -} - -void OpenGLHostDisplay::DestroyTimestampQueries() -{ - if (m_timestamp_queries[0] == 0) - return; - - const bool gles = m_gl_context->IsGLES(); - const auto DeleteQueries = gles ? glDeleteQueriesEXT : glDeleteQueries; - - if (m_timestamp_query_started) - { - const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; - EndQuery(GL_TIME_ELAPSED); - } - - DeleteQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); - m_timestamp_queries.fill(0); - m_read_timestamp_query = 0; - m_write_timestamp_query = 0; - m_waiting_timestamp_queries = 0; - m_timestamp_query_started = false; -} - -void OpenGLHostDisplay::PopTimestampQuery() -{ - const bool gles = m_gl_context->IsGLES(); - - if (gles) - { - GLint disjoint = 0; - glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); - if (disjoint) - { - Log_VerbosePrintf("GPU timing disjoint, resetting."); - if (m_timestamp_query_started) - glEndQueryEXT(GL_TIME_ELAPSED); - - m_read_timestamp_query = 0; - m_write_timestamp_query = 0; - m_waiting_timestamp_queries = 0; - m_timestamp_query_started = false; - } - } - - while (m_waiting_timestamp_queries > 0) - { - const auto GetQueryObjectiv = gles ? glGetQueryObjectivEXT : glGetQueryObjectiv; - const auto GetQueryObjectui64v = gles ? glGetQueryObjectui64vEXT : glGetQueryObjectui64v; - - GLint available = 0; - GetQueryObjectiv(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT_AVAILABLE, &available); - if (!available) - break; - - u64 result = 0; - GetQueryObjectui64v(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT, &result); - m_accumulated_gpu_time += static_cast(static_cast(result) / 1000000.0); - m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; - m_waiting_timestamp_queries--; - } - - if (m_timestamp_query_started) - { - const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; - EndQuery(GL_TIME_ELAPSED); - - m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; - m_timestamp_query_started = false; - m_waiting_timestamp_queries++; - } -} - -void OpenGLHostDisplay::KickTimestampQuery() -{ - if (m_timestamp_query_started || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) - return; - - const bool gles = m_gl_context->IsGLES(); - const auto BeginQuery = gles ? glBeginQueryEXT : glBeginQuery; - - BeginQuery(GL_TIME_ELAPSED, m_timestamp_queries[m_write_timestamp_query]); - m_timestamp_query_started = true; -} - -bool OpenGLHostDisplay::SetGPUTimingEnabled(bool enabled) -{ - if (m_gpu_timing_enabled == enabled) - return true; - - if (enabled && m_gl_context->IsGLES() && - (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)) - { - return false; - } - - m_gpu_timing_enabled = enabled; - if (m_gpu_timing_enabled) - CreateTimestampQueries(); - else - DestroyTimestampQueries(); - - return true; -} - -float OpenGLHostDisplay::GetAndResetAccumulatedGPUTime() -{ - const float value = m_accumulated_gpu_time; - m_accumulated_gpu_time = 0.0f; - return value; -} - -GL::StreamBuffer* OpenGLHostDisplay::GetTextureStreamBuffer() -{ - if (m_use_gles2_draw_path || m_texture_stream_buffer) - return m_texture_stream_buffer.get(); - - m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_STREAM_BUFFER_SIZE); - return m_texture_stream_buffer.get(); -} diff --git a/src/util/opengl_host_display.h b/src/util/opengl_host_display.h deleted file mode 100644 index 9302b4fbd..000000000 --- a/src/util/opengl_host_display.h +++ /dev/null @@ -1,140 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/gl/context.h" -#include "common/gl/loader.h" -#include "common/gl/program.h" -#include "common/gl/stream_buffer.h" -#include "common/gl/texture.h" -#include "common/timer.h" -#include "common/window_info.h" -#include "host_display.h" -#include "postprocessing_chain.h" -#include - -class OpenGLHostDisplay final : public HostDisplay -{ -public: - OpenGLHostDisplay(); - ~OpenGLHostDisplay(); - - RenderAPI GetRenderAPI() const override; - void* GetDevice() const override; - void* GetContext() const override; - - bool HasDevice() const override; - bool HasSurface() const override; - - bool CreateDevice(const WindowInfo& wi, bool vsync) override; - bool SetupDevice() override; - - bool MakeCurrent() override; - bool DoneCurrent() override; - - bool ChangeWindow(const WindowInfo& new_wi) override; - void ResizeWindow(s32 new_window_width, s32 new_window_height) override; - bool SupportsFullscreen() const override; - bool IsFullscreen() override; - bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; - AdapterAndModeList GetAdapterAndModeList() override; - void DestroySurface() override; - - bool SetPostProcessingChain(const std::string_view& config) override; - - std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, u32 data_stride, - bool dynamic = false) override; - bool BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) override; - void EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) override; - bool UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; - bool SupportsTextureFormat(GPUTexture::Format format) const override; - - void SetVSync(bool enabled) override; - - bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; - - bool SetGPUTimingEnabled(bool enabled) override; - float GetAndResetAccumulatedGPUTime() override; - - ALWAYS_INLINE GL::Context* GetGLContext() const { return m_gl_context.get(); } - ALWAYS_INLINE bool UsePBOForUploads() const { return m_use_pbo_for_pixels; } - ALWAYS_INLINE bool UseGLES3DrawPath() const { return m_use_gles2_draw_path; } - ALWAYS_INLINE std::vector& GetTextureRepackBuffer() { return m_texture_repack_buffer; } - - GL::StreamBuffer* GetTextureStreamBuffer(); - -protected: - static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; - - const char* GetGLSLVersionString() const; - std::string GetGLSLVersionHeader() const; - - bool CreateResources() override; - void DestroyResources() override; - - bool CreateImGuiContext() override; - void DestroyImGuiContext() override; - bool UpdateImGuiFontTexture() override; - - void SetSwapInterval(); - - void RenderDisplay(); - void RenderImGui(); - void RenderSoftwareCursor(); - - void RenderDisplay(s32 left, s32 bottom, s32 width, s32 height, GL::Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter); - void RenderSoftwareCursor(s32 left, s32 bottom, s32 width, s32 height, GPUTexture* texture_handle); - - struct PostProcessingStage - { - GL::Program program; - GL::Texture output_texture; - u32 uniforms_size; - }; - - bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); - void ApplyPostProcessingChain(GLuint final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, - GL::Texture* texture, s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, u32 target_width, u32 target_height); - - void CreateTimestampQueries(); - void DestroyTimestampQueries(); - void PopTimestampQuery(); - void KickTimestampQuery(); - - std::unique_ptr m_gl_context; - - GL::Program m_display_program; - GL::Program m_cursor_program; - GLuint m_display_vao = 0; - GLuint m_display_nearest_sampler = 0; - GLuint m_display_linear_sampler = 0; - GLuint m_display_border_sampler = 0; - GLuint m_uniform_buffer_alignment = 1; - - std::unique_ptr m_texture_stream_buffer; - std::vector m_texture_repack_buffer; - u32 m_texture_stream_buffer_offset = 0; - - FrontendCommon::PostProcessingChain m_post_processing_chain; - GL::Texture m_post_processing_input_texture; - std::unique_ptr m_post_processing_ubo; - std::vector m_post_processing_stages; - Common::Timer m_post_processing_timer; - - std::array m_timestamp_queries = {}; - float m_accumulated_gpu_time = 0.0f; - u8 m_read_timestamp_query = 0; - u8 m_write_timestamp_query = 0; - u8 m_waiting_timestamp_queries = 0; - bool m_timestamp_query_started = false; - - bool m_use_gles2_draw_path = false; - bool m_use_pbo_for_pixels = false; -}; diff --git a/src/common/gl/loader.h b/src/util/opengl_loader.h similarity index 79% rename from src/common/gl/loader.h rename to src/util/opengl_loader.h index c7f1479ea..5cdaf1a25 100644 --- a/src/common/gl/loader.h +++ b/src/util/opengl_loader.h @@ -5,7 +5,7 @@ // Fix glad.h including windows.h #ifdef _WIN32 -#include "../windows_headers.h" +#include "common/windows_headers.h" #endif #include "glad.h" \ No newline at end of file diff --git a/src/util/opengl_pipeline.cpp b/src/util/opengl_pipeline.cpp new file mode 100644 index 000000000..f9f1f993c --- /dev/null +++ b/src/util/opengl_pipeline.cpp @@ -0,0 +1,921 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "opengl_pipeline.h" +#include "opengl_device.h" +#include "opengl_stream_buffer.h" +#include "shadergen.h" + +#include "common/assert.h" +#include "common/file_system.h" +#include "common/hash_combine.h" +#include "common/log.h" +#include "common/path.h" +#include "common/scoped_guard.h" +#include "common/string.h" +#include "common/string_util.h" + +#include "fmt/format.h" +#include "zstd.h" +#include "zstd_errors.h" + +#include + +Log_SetChannel(OpenGLPipeline); + +struct PipelineDiskCacheFooter +{ + u32 version; + u32 num_programs; + char driver_vendor[128]; + char driver_renderer[128]; + char driver_version[128]; +}; +static_assert(sizeof(PipelineDiskCacheFooter) == (sizeof(u32) * 2 + 128 * 3)); + +struct PipelineDiskCacheIndexEntry +{ + OpenGLPipeline::ProgramCacheKey key; + u32 format; + u32 offset; + u32 uncompressed_size; + u32 compressed_size; +}; +static_assert(sizeof(PipelineDiskCacheIndexEntry) == 128); // No padding + +static unsigned s_next_bad_shader_id = 1; + +static GLenum GetGLShaderType(GPUShaderStage stage) +{ + static constexpr std::array(GPUShaderStage::MaxCount)> mapping = {{ + GL_VERTEX_SHADER, // Vertex + GL_FRAGMENT_SHADER, // Fragment + GL_COMPUTE_SHADER, // Compute + }}; + + return mapping[static_cast(stage)]; +} + +static void FillFooter(PipelineDiskCacheFooter* footer, u32 version) +{ + footer->version = version; + footer->num_programs = 0; + StringUtil::Strlcpy(footer->driver_vendor, reinterpret_cast(glGetString(GL_VENDOR)), + std::size(footer->driver_vendor)); + StringUtil::Strlcpy(footer->driver_renderer, reinterpret_cast(glGetString(GL_RENDERER)), + std::size(footer->driver_renderer)); + StringUtil::Strlcpy(footer->driver_version, reinterpret_cast(glGetString(GL_VERSION)), + std::size(footer->driver_version)); +} + +OpenGLShader::OpenGLShader(GPUShaderStage stage, GLuint id, const GPUShaderCache::CacheIndexKey& key) + : GPUShader(stage), m_id(id), m_key(key) +{ +} + +OpenGLShader::~OpenGLShader() = default; + +void OpenGLShader::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + glObjectLabel(GL_SHADER, m_id, static_cast(name.length()), static_cast(name.data())); +#endif +} + +std::unique_ptr OpenGLDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + // Not supported.. except spir-v maybe? but no point really... + return {}; +} + +std::unique_ptr OpenGLDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, + DynamicHeapArray* out_binary) +{ + if (std::strcmp(entry_point, "main") != 0) + { + Log_ErrorPrintf("Entry point must be 'main', but got '%s' instead.", entry_point); + return {}; + } + + glGetError(); + + GLuint shader = glCreateShader(GetGLShaderType(stage)); + if (GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("glCreateShader() failed: %u", err); + return {}; + } + + const GLchar* string = source.data(); + const GLint length = static_cast(source.length()); + glShaderSource(shader, 1, &string, &length); + glCompileShader(shader); + + GLint status = GL_FALSE; + glGetShaderiv(shader, GL_COMPILE_STATUS, &status); + + GLint info_log_length = 0; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_log_length); + + if (status == GL_FALSE || info_log_length > 0) + { + std::string info_log; + info_log.resize(info_log_length + 1); + glGetShaderInfoLog(shader, info_log_length, &info_log_length, &info_log[0]); + + if (status == GL_TRUE) + { + Log_ErrorPrintf("Shader compiled with warnings:\n%s", info_log.c_str()); + } + else + { + Log_ErrorPrintf("Shader failed to compile:\n%s", info_log.c_str()); + + auto fp = FileSystem::OpenManagedCFile( + GetShaderDumpPath(fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile %s shader failed\n", GPUShader::GetStageName(stage)); + std::fwrite(info_log.c_str(), info_log_length, 1, fp.get()); + } + + glDeleteShader(shader); + return {}; + } + } + + return std::unique_ptr( + new OpenGLShader(stage, shader, GPUShaderCache::GetCacheKey(stage, source, "main"))); +} + +////////////////////////////////////////////////////////////////////////// + +bool OpenGLPipeline::VertexArrayCacheKey::operator==(const VertexArrayCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) == 0); +} + +bool OpenGLPipeline::VertexArrayCacheKey::operator!=(const VertexArrayCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) != 0); +} + +size_t OpenGLPipeline::VertexArrayCacheKeyHash::operator()(const VertexArrayCacheKey& k) const +{ + std::size_t h = 0; + hash_combine(h, k.num_vertex_attributes, k.vertex_attribute_stride); + for (const VertexAttribute& va : k.vertex_attributes) + hash_combine(h, va.key); + return h; +} + +bool OpenGLPipeline::ProgramCacheKey::operator==(const ProgramCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) == 0); +} + +bool OpenGLPipeline::ProgramCacheKey::operator!=(const ProgramCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) != 0); +} + +size_t OpenGLPipeline::ProgramCacheKeyHash::operator()(const ProgramCacheKey& k) const +{ + // TODO: maybe use xxhash here... + std::size_t h = 0; + hash_combine(h, k.vs_key.entry_point_low, k.vs_key.entry_point_high, k.vs_key.source_hash_low, + k.vs_key.source_hash_high, k.vs_key.source_length, k.vs_key.shader_type); + hash_combine(h, k.fs_key.entry_point_low, k.fs_key.entry_point_high, k.fs_key.source_hash_low, + k.fs_key.source_hash_high, k.fs_key.source_length, k.fs_key.shader_type); + hash_combine(h, k.va_key.num_vertex_attributes, k.va_key.vertex_attribute_stride); + for (const VertexAttribute& va : k.va_key.vertex_attributes) + hash_combine(h, va.key); + return h; +} + +OpenGLPipeline::ProgramCacheKey OpenGLPipeline::GetProgramCacheKey(const GraphicsConfig& plconfig) +{ + Assert(plconfig.input_layout.vertex_attributes.size() <= MAX_VERTEX_ATTRIBUTES); + + ProgramCacheKey ret; + ret.vs_key = static_cast(plconfig.vertex_shader)->GetKey(); + ret.fs_key = static_cast(plconfig.fragment_shader)->GetKey(); + + std::memset(ret.va_key.vertex_attributes, 0, sizeof(ret.va_key.vertex_attributes)); + ret.va_key.vertex_attribute_stride = 0; + ret.va_key.num_vertex_attributes = static_cast(plconfig.input_layout.vertex_attributes.size()); + + if (ret.va_key.num_vertex_attributes > 0) + { + std::memcpy(ret.va_key.vertex_attributes, plconfig.input_layout.vertex_attributes.data(), + sizeof(VertexAttribute) * ret.va_key.num_vertex_attributes); + ret.va_key.vertex_attribute_stride = plconfig.input_layout.vertex_stride; + } + + return ret; +} + +GLuint OpenGLDevice::LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, + const GPUPipeline::GraphicsConfig& plconfig) +{ + auto it = m_program_cache.find(key); + if (it != m_program_cache.end() && it->second.program_id == 0 && it->second.file_uncompressed_size > 0) + { + it->second.program_id = CreateProgramFromPipelineCache(it->second, plconfig); + if (it->second.program_id == 0) + { + Log_ErrorPrintf("Failed to create program from binary."); + m_program_cache.erase(it); + it = m_program_cache.end(); + DiscardPipelineCache(); + } + } + + if (it != m_program_cache.end()) + { + if (it->second.program_id != 0) + it->second.reference_count++; + + return it->second.program_id; + } + + OpenGLPipeline::ProgramCacheItem item; + item.program_id = CompileProgram(plconfig); + item.reference_count = 0; + item.file_format = 0; + item.file_offset = 0; + item.file_uncompressed_size = 0; + item.file_compressed_size = 0; + if (item.program_id != 0) + { + AddToPipelineCache(&item); + item.reference_count++; + } + + // Insert into cache even if we failed, so we don't compile it again, but don't increment reference count. + m_program_cache.emplace(key, item); + return item.program_id; +} + +GLuint OpenGLDevice::CompileProgram(const GPUPipeline::GraphicsConfig& plconfig) +{ + glGetError(); + const GLuint program_id = glCreateProgram(); + if (glGetError() != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create program object."); + return 0; + } + + if (m_pipeline_disk_cache_file) + glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); + + Assert(plconfig.vertex_shader && plconfig.fragment_shader); + glAttachShader(program_id, static_cast(plconfig.vertex_shader)->GetGLId()); + glAttachShader(program_id, static_cast(plconfig.fragment_shader)->GetGLId()); + + if (!ShaderGen::UseGLSLBindingLayout()) + { + static constexpr std::array(GPUPipeline::VertexAttribute::Semantic::MaxCount)> + semantic_vars = {{ + "a_pos", // Position + "a_tex", // TexCoord + "a_col", // Color + }}; + + for (u32 i = 0; i < static_cast(plconfig.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = plconfig.input_layout.vertex_attributes[i]; + if (va.semantic == GPUPipeline::VertexAttribute::Semantic::Position && va.semantic_index == 0) + { + glBindAttribLocation(program_id, i, "a_pos"); + } + else + { + glBindAttribLocation( + program_id, i, + TinyString::FromFmt("{}{}", semantic_vars[static_cast(va.semantic.GetValue())], va.semantic_index)); + } + } + + glBindFragDataLocation(program_id, 0, "o_col0"); + + if (m_features.dual_source_blend) + { + if (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended) + glBindFragDataLocationIndexed(program_id, 1, 0, "o_col1"); + else if (GLAD_GL_EXT_blend_func_extended) + glBindFragDataLocationIndexedEXT(program_id, 1, 0, "o_col1"); + } + } + + glLinkProgram(program_id); + + GLint status = GL_FALSE; + glGetProgramiv(program_id, GL_LINK_STATUS, &status); + + GLint info_log_length = 0; + glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (status == GL_FALSE || info_log_length > 0) + { + std::string info_log; + info_log.resize(info_log_length + 1); + glGetProgramInfoLog(program_id, info_log_length, &info_log_length, &info_log[0]); + + if (status == GL_TRUE) + { + Log_ErrorPrintf("Program linked with warnings:\n%s", info_log.c_str()); + } + else + { + Log_ErrorPrintf("Program failed to link:\n%s", info_log.c_str()); + glDeleteProgram(program_id); + return 0; + } + } + + PostLinkProgram(plconfig, program_id); + + return program_id; +} + +void OpenGLDevice::PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig, GLuint program_id) +{ + if (!ShaderGen::UseGLSLBindingLayout()) + { + GLint location = glGetUniformBlockIndex(program_id, "UBOBlock"); + if (location >= 0) + glUniformBlockBinding(program_id, location, 1); + + glUseProgram(program_id); + + // Texture buffer is zero here, so we have to bump it. + const u32 num_textures = std::max(GetActiveTexturesForLayout(plconfig.layout), 1); + for (u32 i = 0; i < num_textures; i++) + { + location = glGetUniformLocation(program_id, TinyString::FromFmt("samp{}", i)); + if (location >= 0) + glUniform1i(location, i); + } + + glUseProgram(m_current_pipeline ? m_current_pipeline->GetProgram() : 0); + } +} + +void OpenGLDevice::UnrefProgram(const OpenGLPipeline::ProgramCacheKey& key) +{ + auto it = m_program_cache.find(key); + Assert(it != m_program_cache.end() && it->second.program_id != 0 && it->second.reference_count > 0); + + if ((--it->second.reference_count) > 0) + return; + + if (m_last_program == it->second.program_id) + { + m_last_program = 0; + glUseProgram(0); + } + + glDeleteProgram(it->second.program_id); + it->second.program_id = 0; +} + +GLuint OpenGLDevice::LookupVAOCache(const OpenGLPipeline::VertexArrayCacheKey& key) +{ + auto it = m_vao_cache.find(key); + if (it != m_vao_cache.end()) + { + it->second.reference_count++; + return it->second.vao_id; + } + + OpenGLPipeline::VertexArrayCacheItem item; + item.vao_id = + CreateVAO(gsl::span(key.vertex_attributes, key.num_vertex_attributes), + key.vertex_attribute_stride); + if (item.vao_id == 0) + return 0; + + item.reference_count = 1; + m_vao_cache.emplace(key, item); + return item.vao_id; +} + +GLuint OpenGLDevice::CreateVAO(gsl::span attributes, u32 stride) +{ + glGetError(); + GLuint vao; + glGenVertexArrays(1, &vao); + if (const GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create vertex array object: %u", vao); + return 0; + } + + glBindVertexArray(vao); + m_vertex_buffer->Bind(); + m_index_buffer->Bind(); + + struct VAMapping + { + GLenum type; + GLboolean normalized; + GLboolean integer; + }; + static constexpr const std::array(GPUPipeline::VertexAttribute::Type::MaxCount)> + format_mapping = {{ + {GL_FLOAT, GL_FALSE, GL_FALSE}, // Float + {GL_UNSIGNED_BYTE, GL_FALSE, GL_TRUE}, // UInt8 + {GL_BYTE, GL_FALSE, GL_TRUE}, // SInt8 + {GL_UNSIGNED_BYTE, GL_TRUE, GL_FALSE}, // UNorm8 + {GL_UNSIGNED_SHORT, GL_FALSE, GL_TRUE}, // UInt16 + {GL_SHORT, GL_FALSE, GL_TRUE}, // SInt16 + {GL_UNSIGNED_SHORT, GL_TRUE, GL_FALSE}, // UNorm16 + {GL_UNSIGNED_INT, GL_FALSE, GL_TRUE}, // UInt32 + {GL_INT, GL_FALSE, GL_TRUE}, // SInt32 + }}; + + for (u32 i = 0; i < static_cast(attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = attributes[i]; + const VAMapping& m = format_mapping[static_cast(va.type.GetValue())]; + const void* ptr = reinterpret_cast(static_cast(va.offset.GetValue())); + glEnableVertexAttribArray(i); + if (m.integer) + glVertexAttribIPointer(i, va.components, m.type, stride, ptr); + else + glVertexAttribPointer(i, va.components, m.type, m.normalized, stride, ptr); + } + + glBindVertexArray(m_last_vao); + + return vao; +} + +void OpenGLDevice::UnrefVAO(const OpenGLPipeline::VertexArrayCacheKey& key) +{ + auto it = m_vao_cache.find(key); + Assert(it != m_vao_cache.end() && it->second.reference_count > 0); + + if ((--it->second.reference_count) > 0) + return; + + if (m_last_vao == it->second.vao_id) + { + m_last_vao = 0; + glBindVertexArray(0); + } + + glDeleteVertexArrays(1, &it->second.vao_id); + m_vao_cache.erase(it); +} + +OpenGLPipeline::OpenGLPipeline(const ProgramCacheKey& key, GLuint program, GLuint vao, const RasterizationState& rs, + const DepthState& ds, const BlendState& bs, GLenum topology) + : m_key(key), m_program(program), m_vao(vao), m_blend_state(bs), m_rasterization_state(rs), m_depth_state(ds), + m_topology(topology) +{ +} + +OpenGLPipeline::~OpenGLPipeline() +{ + OpenGLDevice& dev = OpenGLDevice::GetInstance(); + dev.UnbindPipeline(this); + dev.UnrefProgram(m_key); + dev.UnrefVAO(m_key.va_key); +} + +void OpenGLPipeline::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + glObjectLabel(GL_PROGRAM, m_program, static_cast(name.length()), name.data()); +#endif +} + +std::unique_ptr OpenGLDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ + const OpenGLPipeline::ProgramCacheKey pkey = OpenGLPipeline::GetProgramCacheKey(config); + + const GLuint program_id = LookupProgramCache(pkey, config); + if (program_id == 0) + return {}; + + const GLuint vao_id = LookupVAOCache(pkey.va_key); + if (vao_id == 0) + { + UnrefProgram(pkey); + return {}; + } + + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ + GL_POINTS, // Points + GL_LINES, // Lines + GL_TRIANGLES, // Triangles + GL_TRIANGLE_STRIP, // TriangleStrips + }}; + + return std::unique_ptr(new OpenGLPipeline(pkey, program_id, vao_id, config.rasterization, config.depth, + config.blend, primitives[static_cast(config.primitive)])); +} + +ALWAYS_INLINE static void ApplyRasterizationState(const GPUPipeline::RasterizationState& rs) +{ + if (rs.cull_mode == GPUPipeline::CullMode::None) + { + glDisable(GL_CULL_FACE); + } + else + { + glEnable(GL_CULL_FACE); + glCullFace((rs.cull_mode == GPUPipeline::CullMode::Front) ? GL_FRONT : GL_BACK); + } + + // TODO: always enabled, should be done at init time + glEnable(GL_SCISSOR_TEST); +} + +ALWAYS_INLINE static void ApplyDepthState(const GPUPipeline::DepthState& ds) +{ + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = {{ + GL_NEVER, // Never + GL_ALWAYS, // Always + GL_LESS, // Less + GL_LEQUAL, // LessEqual + GL_GREATER, // Greater + GL_GEQUAL, // GreaterEqual + GL_EQUAL, // Equal + }}; + + (ds.depth_test != GPUPipeline::DepthFunc::Never) ? glEnable(GL_DEPTH_TEST) : glDisable(GL_DEPTH_TEST); + glDepthFunc(func_mapping[static_cast(ds.depth_test.GetValue())]); + glDepthMask(ds.depth_write); +} + +ALWAYS_INLINE static void ApplyBlendState(const GPUPipeline::BlendState& bs) +{ + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + GL_ZERO, // Zero + GL_ONE, // One + GL_SRC_COLOR, // SrcColor + GL_ONE_MINUS_SRC_COLOR, // InvSrcColor + GL_DST_COLOR, // DstColor + GL_ONE_MINUS_DST_COLOR, // InvDstColor + GL_SRC_ALPHA, // SrcAlpha + GL_ONE_MINUS_SRC_ALPHA, // InvSrcAlpha + GL_SRC1_ALPHA, // SrcAlpha1 + GL_ONE_MINUS_SRC1_ALPHA, // InvSrcAlpha1 + GL_DST_ALPHA, // DstAlpha + GL_ONE_MINUS_DST_ALPHA, // InvDstAlpha + GL_CONSTANT_COLOR, // ConstantColor + GL_ONE_MINUS_CONSTANT_COLOR, // InvConstantColor + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + GL_FUNC_ADD, // Add + GL_FUNC_SUBTRACT, // Subtract + GL_FUNC_REVERSE_SUBTRACT, // ReverseSubtract + GL_MIN, // Min + GL_MAX, // Max + }}; + + // TODO: driver bugs + // TODO: rdoc and look for redundant calls + + bs.enable ? glEnable(GL_BLEND) : glDisable(GL_BLEND); + + if (bs.enable) + { + glBlendFuncSeparate(blend_mapping[static_cast(bs.src_blend.GetValue())], + blend_mapping[static_cast(bs.dst_blend.GetValue())], + blend_mapping[static_cast(bs.src_alpha_blend.GetValue())], + blend_mapping[static_cast(bs.dst_alpha_blend.GetValue())]); + glBlendEquationSeparate(op_mapping[static_cast(bs.blend_op.GetValue())], + op_mapping[static_cast(bs.alpha_blend_op.GetValue())]); + + // TODO: cache this to avoid calls? + glBlendColor(bs.GetConstantRed(), bs.GetConstantGreen(), bs.GetConstantBlue(), bs.GetConstantAlpha()); + } + + glColorMask(bs.write_r, bs.write_g, bs.write_b, bs.write_a); +} + +void OpenGLDevice::SetPipeline(GPUPipeline* pipeline) +{ + if (m_current_pipeline == pipeline) + return; + + OpenGLPipeline* const P = static_cast(pipeline); + m_current_pipeline = P; + + if (m_last_rasterization_state != P->GetRasterizationState()) + { + m_last_rasterization_state = P->GetRasterizationState(); + ApplyRasterizationState(m_last_rasterization_state); + } + if (m_last_depth_state != P->GetDepthState()) + { + m_last_depth_state = P->GetDepthState(); + ApplyDepthState(m_last_depth_state); + } + if (m_last_blend_state != P->GetBlendState()) + { + m_last_blend_state = P->GetBlendState(); + ApplyBlendState(m_last_blend_state); + } + if (m_last_vao != P->GetVAO()) + { + m_last_vao = P->GetVAO(); + glBindVertexArray(m_last_vao); + } + if (m_last_program != P->GetProgram()) + { + m_last_program = P->GetProgram(); + glUseProgram(m_last_program); + } +} + +bool OpenGLDevice::ReadPipelineCache(const std::string& filename) +{ + DebugAssert(!m_pipeline_disk_cache_file); + + m_pipeline_disk_cache_file = FileSystem::OpenCFile(filename.c_str(), "r+b"); + m_pipeline_disk_cache_filename = filename; + + if (!m_pipeline_disk_cache_file) + { + // Multiple instances running? Ignore. + if (errno == EACCES) + { + m_pipeline_disk_cache_filename = {}; + return true; + } + + // If it doesn't exist, we're going to create it. + if (errno != ENOENT) + { + Log_WarningPrintf("Failed to open shader cache: %d", errno); + m_pipeline_disk_cache_filename = {}; + return false; + } + + Log_WarningPrintf("Disk cache does not exist, creating."); + return DiscardPipelineCache(); + } + + // Read footer. + const s64 size = FileSystem::FSize64(m_pipeline_disk_cache_file); + if (size < sizeof(PipelineDiskCacheFooter) || size >= static_cast(std::numeric_limits::max())) + return DiscardPipelineCache(); + + PipelineDiskCacheFooter file_footer; + if (FileSystem::FSeek64(m_pipeline_disk_cache_file, size - sizeof(PipelineDiskCacheFooter), SEEK_SET) != 0 || + std::fread(&file_footer, sizeof(file_footer), 1, m_pipeline_disk_cache_file) != 1) + { + Log_ErrorPrintf("Failed to read disk cache footer."); + return DiscardPipelineCache(); + } + + PipelineDiskCacheFooter expected_footer; + FillFooter(&expected_footer, m_shader_cache.GetVersion()); + + if (file_footer.version != expected_footer.version || + std::strncmp(file_footer.driver_vendor, expected_footer.driver_vendor, std::size(file_footer.driver_vendor)) != + 0 || + std::strncmp(file_footer.driver_renderer, expected_footer.driver_renderer, + std::size(file_footer.driver_renderer)) != 0 || + std::strncmp(file_footer.driver_version, expected_footer.driver_version, std::size(file_footer.driver_version)) != + 0) + { + Log_ErrorPrintf("Disk cache does not match expected driver/version."); + return DiscardPipelineCache(); + } + + m_pipeline_disk_cache_data_end = static_cast(size) - sizeof(PipelineDiskCacheFooter) - + (sizeof(PipelineDiskCacheIndexEntry) * file_footer.num_programs); + if (m_pipeline_disk_cache_data_end < 0 || + FileSystem::FSeek64(m_pipeline_disk_cache_file, m_pipeline_disk_cache_data_end, SEEK_SET) != 0) + { + Log_ErrorPrintf("Failed to seek to start of index entries."); + return DiscardPipelineCache(); + } + + // Read entries. + for (u32 i = 0; i < file_footer.num_programs; i++) + { + PipelineDiskCacheIndexEntry entry; + if (std::fread(&entry, sizeof(entry), 1, m_pipeline_disk_cache_file) != 1 || + (static_cast(entry.offset) + static_cast(entry.compressed_size)) >= size) + { + Log_ErrorPrintf("Failed to read disk cache entry."); + return DiscardPipelineCache(); + } + + if (m_program_cache.find(entry.key) != m_program_cache.end()) + { + Log_ErrorPrintf("Duplicate program in disk cache."); + return DiscardPipelineCache(); + } + + OpenGLPipeline::ProgramCacheItem pitem; + pitem.file_format = entry.format; + pitem.file_offset = entry.offset; + pitem.file_uncompressed_size = entry.uncompressed_size; + pitem.file_compressed_size = entry.compressed_size; + m_program_cache.emplace(entry.key, pitem); + } + + Log_VerbosePrintf("Read %zu programs from disk cache.", m_program_cache.size()); + return true; +} + +bool OpenGLDevice::GetPipelineCacheData(DynamicHeapArray* data) +{ + // Self-managed. + return false; +} + +GLuint OpenGLDevice::CreateProgramFromPipelineCache(const OpenGLPipeline::ProgramCacheItem& it, + const GPUPipeline::GraphicsConfig& plconfig) +{ + DynamicHeapArray data(it.file_uncompressed_size); + DynamicHeapArray compressed_data(it.file_compressed_size); + + if (FileSystem::FSeek64(m_pipeline_disk_cache_file, it.file_offset, SEEK_SET) != 0 || + std::fread(compressed_data.data(), it.file_compressed_size, 1, m_pipeline_disk_cache_file) != 1) + { + Log_ErrorPrintf("Failed to read program from disk cache."); + return 0; + } + + const size_t decompress_result = + ZSTD_decompress(data.data(), data.size(), compressed_data.data(), compressed_data.size()); + if (ZSTD_isError(decompress_result)) + { + Log_ErrorPrintf("Failed to decompress program from disk cache: %s", ZSTD_getErrorName(decompress_result)); + return 0; + } + compressed_data.deallocate(); + + glGetError(); + GLuint prog = glCreateProgram(); + if (const GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create program object: %u", err); + return 0; + } + + glProgramBinary(prog, it.file_format, data.data(), it.file_uncompressed_size); + + GLint link_status; + glGetProgramiv(prog, GL_LINK_STATUS, &link_status); + if (link_status != GL_TRUE) + { + Log_ErrorPrintf("Failed to create GL program from binary: status %d, discarding cache.", link_status); + glDeleteProgram(prog); + return 0; + } + + PostLinkProgram(plconfig, prog); + + return prog; +} + +void OpenGLDevice::AddToPipelineCache(OpenGLPipeline::ProgramCacheItem* it) +{ + DebugAssert(it->program_id != 0 && it->file_uncompressed_size == 0); + DebugAssert(m_pipeline_disk_cache_file); + + GLint binary_size = 0; + glGetProgramiv(it->program_id, GL_PROGRAM_BINARY_LENGTH, &binary_size); + if (binary_size == 0) + { + Log_WarningPrint("glGetProgramiv(GL_PROGRAM_BINARY_LENGTH) returned 0"); + return; + } + + GLenum format = 0; + DynamicHeapArray uncompressed_data(binary_size); + glGetProgramBinary(it->program_id, binary_size, &binary_size, &format, uncompressed_data.data()); + if (binary_size == 0) + { + Log_WarningPrint("glGetProgramBinary() failed"); + return; + } + else if (static_cast(binary_size) != uncompressed_data.size()) + { + Log_WarningPrintf("Size changed from %zu to %d after glGetProgramBinary()", uncompressed_data.size(), binary_size); + } + + DynamicHeapArray compressed_data(ZSTD_compressBound(binary_size)); + const size_t compress_result = + ZSTD_compress(compressed_data.data(), compressed_data.size(), uncompressed_data.data(), binary_size, 0); + if (ZSTD_isError(compress_result)) + { + Log_ErrorPrintf("Failed to compress program: %s", ZSTD_getErrorName(compress_result)); + return; + } + + Log_DevPrintf("Program binary retrieved and compressed, %zu -> %zu bytes, format %u", + static_cast(binary_size), compress_result, format); + + if (FileSystem::FSeek64(m_pipeline_disk_cache_file, m_pipeline_disk_cache_data_end, SEEK_SET) != 0 || + std::fwrite(compressed_data.data(), compress_result, 1, m_pipeline_disk_cache_file) != 1) + { + Log_ErrorPrintf("Failed to write binary to disk cache."); + } + + it->file_format = format; + it->file_offset = m_pipeline_disk_cache_data_end; + it->file_uncompressed_size = static_cast(binary_size); + it->file_compressed_size = static_cast(compress_result); + m_pipeline_disk_cache_data_end += static_cast(compress_result); + m_pipeline_disk_cache_changed = true; +} + +bool OpenGLDevice::DiscardPipelineCache() +{ + // Remove any other disk cache entries which haven't been loaded. + for (auto it = m_program_cache.begin(); it != m_program_cache.end();) + { + if (it->second.program_id != 0) + { + it->second.file_format = 0; + it->second.file_offset = 0; + it->second.file_uncompressed_size = 0; + it->second.file_compressed_size = 0; + ++it; + continue; + } + + it = m_program_cache.erase(it); + } + + if (m_pipeline_disk_cache_file) + std::fclose(m_pipeline_disk_cache_file); + + m_pipeline_disk_cache_data_end = 0; + m_pipeline_disk_cache_file = FileSystem::OpenCFile(m_pipeline_disk_cache_filename.c_str(), "w+b"); + if (!m_pipeline_disk_cache_file) + { + Log_ErrorPrintf("Failed to reopen pipeline cache: %d", errno); + m_pipeline_disk_cache_filename = {}; + return false; + } + + return true; +} + +void OpenGLDevice::ClosePipelineCache() +{ + const ScopedGuard file_closer = [this]() { + std::fclose(m_pipeline_disk_cache_file); + m_pipeline_disk_cache_file = nullptr; + }; + + if (!m_pipeline_disk_cache_changed) + { + Log_VerbosePrintf("Not updating pipeline cache because it has not changed."); + return; + } + + if (FileSystem::FSeek64(m_pipeline_disk_cache_file, m_pipeline_disk_cache_data_end, SEEK_SET) != 0) + { + Log_ErrorPrintf("Failed to seek to data end."); + return; + } + + u32 count = 0; + + for (const auto& it : m_program_cache) + { + if (it.second.file_uncompressed_size == 0) + continue; + + PipelineDiskCacheIndexEntry entry; + std::memcpy(&entry.key, &it.first, sizeof(entry.key)); + entry.format = it.second.file_format; + entry.offset = it.second.file_offset; + entry.compressed_size = it.second.file_compressed_size; + entry.uncompressed_size = it.second.file_uncompressed_size; + + if (std::fwrite(&entry, sizeof(entry), 1, m_pipeline_disk_cache_file) != 1) + { + Log_ErrorPrintf("Failed to write index entry."); + return; + } + + count++; + } + + PipelineDiskCacheFooter footer; + FillFooter(&footer, m_shader_cache.GetVersion()); + footer.num_programs = count; + + if (std::fwrite(&footer, sizeof(footer), 1, m_pipeline_disk_cache_file) != 1) + Log_ErrorPrintf("Failed to write footer."); +} diff --git a/src/util/opengl_pipeline.h b/src/util/opengl_pipeline.h new file mode 100644 index 000000000..46a6e3ea6 --- /dev/null +++ b/src/util/opengl_pipeline.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" +#include "gpu_shader_cache.h" +#include "opengl_loader.h" + +class OpenGLDevice; + +class OpenGLShader final : public GPUShader +{ + friend OpenGLDevice; + +public: + ~OpenGLShader() override; + + void SetDebugName(const std::string_view& name) override; + + ALWAYS_INLINE GLuint GetGLId() const { return m_id; } + ALWAYS_INLINE const GPUShaderCache::CacheIndexKey& GetKey() const { return m_key; } + +private: + OpenGLShader(GPUShaderStage stage, GLuint id, const GPUShaderCache::CacheIndexKey& key); + + GLuint m_id; + GPUShaderCache::CacheIndexKey m_key; +}; + +class OpenGLPipeline final : public GPUPipeline +{ + friend OpenGLDevice; + +public: + static constexpr u32 MAX_VERTEX_ATTRIBUTES = 6; + + struct VertexArrayCacheKey + { + VertexAttribute vertex_attributes[MAX_VERTEX_ATTRIBUTES]; + u32 vertex_attribute_stride; + u32 num_vertex_attributes; + + bool operator==(const VertexArrayCacheKey& rhs) const; + bool operator!=(const VertexArrayCacheKey& rhs) const; + }; + struct VertexArrayCacheItem + { + GLuint vao_id; + u32 reference_count; + }; + struct VertexArrayCacheKeyHash + { + size_t operator()(const VertexArrayCacheKey& k) const; + }; + using VertexArrayCache = std::unordered_map; + + struct ProgramCacheKey + { + GPUShaderCache::CacheIndexKey vs_key; + GPUShaderCache::CacheIndexKey fs_key; + VertexArrayCacheKey va_key; + + bool operator==(const ProgramCacheKey& rhs) const; + bool operator!=(const ProgramCacheKey& rhs) const; + }; + static_assert(sizeof(ProgramCacheKey) == 112); // Has no padding + struct ProgramCacheKeyHash + { + size_t operator()(const ProgramCacheKey& k) const; + }; + struct ProgramCacheItem + { + GLuint program_id; + u32 reference_count; + GLenum file_format; + u32 file_offset; + u32 file_compressed_size; + u32 file_uncompressed_size; + }; + using ProgramCache = std::unordered_map; + + static ProgramCacheKey GetProgramCacheKey(const GraphicsConfig& plconfig); + + ~OpenGLPipeline() override; + + ALWAYS_INLINE GLuint GetProgram() const { return m_program; } + ALWAYS_INLINE GLuint GetVAO() const { return m_vao; } + ALWAYS_INLINE const RasterizationState& GetRasterizationState() const { return m_rasterization_state; } + ALWAYS_INLINE const DepthState& GetDepthState() const { return m_depth_state; } + ALWAYS_INLINE const BlendState& GetBlendState() const { return m_blend_state; } + ALWAYS_INLINE GLenum GetTopology() const { return m_topology; } + + void SetDebugName(const std::string_view& name) override; + +private: + OpenGLPipeline(const ProgramCacheKey& key, GLuint program, GLuint vao, const RasterizationState& rs, + const DepthState& ds, const BlendState& bs, GLenum topology); + + ProgramCacheKey m_key; + GLuint m_program; + GLuint m_vao; + BlendState m_blend_state; + RasterizationState m_rasterization_state; + DepthState m_depth_state; + GLenum m_topology; +}; diff --git a/src/common/gl/stream_buffer.cpp b/src/util/opengl_stream_buffer.cpp similarity index 61% rename from src/common/gl/stream_buffer.cpp rename to src/util/opengl_stream_buffer.cpp index a50930cb3..27f2b9051 100644 --- a/src/common/gl/stream_buffer.cpp +++ b/src/util/opengl_stream_buffer.cpp @@ -1,57 +1,71 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "stream_buffer.h" -#include "../align.h" -#include "../assert.h" +#include "opengl_stream_buffer.h" + +#include "common/align.h" +#include "common/assert.h" + #include #include -namespace GL { - -StreamBuffer::StreamBuffer(GLenum target, GLuint buffer_id, u32 size) +OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : m_target(target), m_buffer_id(buffer_id), m_size(size) { } -StreamBuffer::~StreamBuffer() +OpenGLStreamBuffer::~OpenGLStreamBuffer() { glDeleteBuffers(1, &m_buffer_id); } -void StreamBuffer::Bind() +void OpenGLStreamBuffer::Bind() { glBindBuffer(m_target, m_buffer_id); } -void StreamBuffer::Unbind() +void OpenGLStreamBuffer::Unbind() { glBindBuffer(m_target, 0); } -namespace detail { +void OpenGLStreamBuffer::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + { + glObjectLabel(GL_BUFFER, GetGLBufferId(), static_cast(name.length()), + static_cast(name.data())); + } +#endif +} + +namespace { // Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage. -class BufferSubDataStreamBuffer final : public StreamBuffer +class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer { public: - ~BufferSubDataStreamBuffer() override = default; + ~BufferSubDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } MappingResult Map(u32 alignment, u32 min_size) override { - return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; + return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } - void Unmap(u32 used_size) override + u32 Unmap(u32 used_size) override { if (used_size == 0) - return; + return 0; glBindBuffer(m_target, m_buffer_id); - glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data()); + glBufferSubData(m_target, 0, used_size, m_cpu_buffer); + return 0; } - static std::unique_ptr Create(GLenum target, u32 size) + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { glGetError(); @@ -63,43 +77,49 @@ public: GLenum err = glGetError(); if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; } - return std::unique_ptr(new BufferSubDataStreamBuffer(target, buffer_id, size)); + return std::unique_ptr(new BufferSubDataStreamBuffer(target, buffer_id, size)); } private: - BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) - : StreamBuffer(target, buffer_id, size), m_cpu_buffer(size) + BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::AlignedMalloc(size, 32)); + if (!m_cpu_buffer) + Panic("Failed to allocate CPU storage for GL buffer"); } - std::vector m_cpu_buffer; + u8* m_cpu_buffer; }; // Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync. -class BufferDataStreamBuffer final : public StreamBuffer +class BufferDataStreamBuffer final : public OpenGLStreamBuffer { public: - ~BufferDataStreamBuffer() override = default; + ~BufferDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } MappingResult Map(u32 alignment, u32 min_size) override { - return MappingResult{static_cast(m_cpu_buffer.data()), 0, 0, m_size / alignment}; + return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } - void Unmap(u32 used_size) override + u32 Unmap(u32 used_size) override { if (used_size == 0) - return; + return 0; glBindBuffer(m_target, m_buffer_id); - glBufferData(m_target, used_size, m_cpu_buffer.data(), GL_STREAM_DRAW); + glBufferData(m_target, used_size, m_cpu_buffer, GL_STREAM_DRAW); + return 0; } - static std::unique_ptr Create(GLenum target, u32 size) + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { glGetError(); @@ -111,24 +131,27 @@ public: GLenum err = glGetError(); if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; } - return std::unique_ptr(new BufferDataStreamBuffer(target, buffer_id, size)); + return std::unique_ptr(new BufferDataStreamBuffer(target, buffer_id, size)); } private: - BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) - : StreamBuffer(target, buffer_id, size), m_cpu_buffer(size) + BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::AlignedMalloc(size, 32)); + if (!m_cpu_buffer) + Panic("Failed to allocate CPU storage for GL buffer"); } - std::vector m_cpu_buffer; + u8* m_cpu_buffer; }; // Base class for implementations which require syncing. -class SyncingStreamBuffer : public StreamBuffer +class SyncingStreamBuffer : public OpenGLStreamBuffer { public: enum : u32 @@ -147,13 +170,13 @@ public: protected: SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size) - : StreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) + : OpenGLStreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) { } - u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } + ALWAYS_INLINE u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } - void AddSyncsForOffset(u32 offset) + ALWAYS_INLINE void AddSyncsForOffset(u32 offset) { const u32 end = GetSyncIndexForOffset(offset); for (; m_used_block_index < end; m_used_block_index++) @@ -163,14 +186,14 @@ protected: } } - void WaitForSync(GLsync& sync) + ALWAYS_INLINE void WaitForSync(GLsync& sync) { glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(sync); sync = nullptr; } - void EnsureSyncsWaitedForOffset(u32 offset) + ALWAYS_INLINE void EnsureSyncsWaitedForOffset(u32 offset) { const u32 end = std::min(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS); for (; m_available_block_index < end; m_available_block_index++) @@ -207,6 +230,8 @@ protected: } } + u32 GetChunkSize() const override { return m_size / NUM_SYNC_POINTS; } + u32 m_position = 0; u32 m_used_block_index = 0; u32 m_available_block_index = NUM_SYNC_POINTS; @@ -221,6 +246,7 @@ public: { glBindBuffer(m_target, m_buffer_id); glUnmapBuffer(m_target); + glBindBuffer(m_target, 0); } MappingResult Map(u32 alignment, u32 min_size) override @@ -236,19 +262,22 @@ public: free_space_in_block / alignment}; } - void Unmap(u32 used_size) override + u32 Unmap(u32 used_size) override { DebugAssert((m_position + used_size) <= m_size); if (!m_coherent) { + // TODO: shouldn't be needed anymore Bind(); glFlushMappedBufferRange(m_target, m_position, used_size); } + const u32 prev_position = m_position; m_position += used_size; + return prev_position; } - static std::unique_ptr Create(GLenum target, u32 size, bool coherent = true) + static std::unique_ptr Create(GLenum target, u32 size, bool coherent = true) { glGetError(); @@ -266,14 +295,16 @@ public: GLenum err = glGetError(); if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); glDeleteBuffers(1, &buffer_id); return {}; } u8* mapped_ptr = static_cast(glMapBufferRange(target, 0, size, map_flags)); - Assert(mapped_ptr); + AssertMsg(mapped_ptr, "Persistent buffer was mapped"); - return std::unique_ptr(new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent)); + return std::unique_ptr( + new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent)); } private: @@ -286,14 +317,14 @@ private: bool m_coherent; }; -} // namespace detail +} // namespace -std::unique_ptr StreamBuffer::Create(GLenum target, u32 size) +std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u32 size) { - std::unique_ptr buf; + std::unique_ptr buf; if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { - buf = detail::BufferStorageStreamBuffer::Create(target, size); + buf = BufferStorageStreamBuffer::Create(target, size); if (buf) return buf; } @@ -304,13 +335,11 @@ std::unique_ptr StreamBuffer::Create(GLenum target, u32 size) if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) { // Mali and Adreno drivers can't do sub-buffer tracking... - return detail::BufferDataStreamBuffer::Create(target, size); + return BufferDataStreamBuffer::Create(target, size); } - return detail::BufferSubDataStreamBuffer::Create(target, size); + return BufferSubDataStreamBuffer::Create(target, size); #else - return detail::BufferDataStreamBuffer::Create(target, size); + return BufferDataStreamBuffer::Create(target, size); #endif } - -} // namespace GL diff --git a/src/common/gl/stream_buffer.h b/src/util/opengl_stream_buffer.h similarity index 52% rename from src/common/gl/stream_buffer.h rename to src/util/opengl_stream_buffer.h index c7a35e0d6..5a2c65d7f 100644 --- a/src/common/gl/stream_buffer.h +++ b/src/util/opengl_stream_buffer.h @@ -1,18 +1,21 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../types.h" -#include "loader.h" + +#include "opengl_loader.h" + +#include "common/types.h" + #include +#include #include #include -namespace GL { -class StreamBuffer +class OpenGLStreamBuffer { public: - virtual ~StreamBuffer(); + virtual ~OpenGLStreamBuffer(); ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; } ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; } @@ -21,6 +24,8 @@ public: void Bind(); void Unbind(); + void SetDebugName(const std::string_view& name); + struct MappingResult { void* pointer; @@ -30,15 +35,19 @@ public: }; virtual MappingResult Map(u32 alignment, u32 min_size) = 0; - virtual void Unmap(u32 used_size) = 0; - static std::unique_ptr Create(GLenum target, u32 size); + /// Returns the position in the buffer *before* the start of used_size. + virtual u32 Unmap(u32 used_size) = 0; + + /// Returns the minimum granularity of blocks which sync objects will be created around. + virtual u32 GetChunkSize() const = 0; + + static std::unique_ptr Create(GLenum target, u32 size); protected: - StreamBuffer(GLenum target, GLuint buffer_id, u32 size); + OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size); GLenum m_target; GLuint m_buffer_id; u32 m_size; }; -} // namespace GL \ No newline at end of file diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp new file mode 100644 index 000000000..352d7863f --- /dev/null +++ b/src/util/opengl_texture.cpp @@ -0,0 +1,688 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "opengl_texture.h" +#include "opengl_device.h" +#include "opengl_stream_buffer.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" +#include "common/string_util.h" + +#include +#include +#include + +Log_SetChannel(OpenGLDevice); + +// Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems +// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. +static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; + +// The pitch alignment must be less or equal to the upload alignment. +// We need 32 here for AVX2, so 64 is also fine. +static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; + +const std::tuple& OpenGLTexture::GetPixelFormatMapping(GPUTexture::Format format) +{ + static constexpr std::array, static_cast(GPUTexture::Format::MaxCount)> + mapping = {{ + {}, // Unknown + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R32I, GL_RED, GL_INT}, // R32I + {GL_R32UI, GL_RED, GL_UNSIGNED_INT}, // R32U + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + }}; + + return mapping[static_cast(format)]; +} + +OpenGLTexture::OpenGLTexture() = default; + +OpenGLTexture::~OpenGLTexture() +{ + Destroy(); +} + +bool OpenGLTexture::UseTextureStorage(bool multisampled) +{ + return GLAD_GL_ARB_texture_storage || (multisampled ? GLAD_GL_ES_VERSION_3_1 : GLAD_GL_ES_VERSION_3_0); +} + +bool OpenGLTexture::UseTextureStorage() const +{ + return UseTextureStorage(IsMultisampled()); +} + +bool OpenGLTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + const void* data, u32 data_pitch) +{ + if (!ValidateConfig(width, height, layers, levels, samples, type, format)) + return false; + + if (layers > 1 && data) + { + Log_ErrorPrintf("Loading texture array data not currently supported"); + return false; + } + + const GLenum target = + ((samples > 1) ? GL_TEXTURE_2D_MULTISAMPLE : ((layers > 1) ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D)); + const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(format); + + OpenGLDevice::BindUpdateTextureUnit(); + + glGetError(); + + GLuint id; + glGenTextures(1, &id); + glBindTexture(target, id); + + if (samples > 1) + { + Assert(!data); + if (UseTextureStorage(true)) + { + glTexStorage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); + } + else + { + glTexImage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); + } + + glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels); + } + else + { + if (UseTextureStorage(false)) + { + if (layers > 1) + glTexStorage3D(target, levels, gl_internal_format, width, height, layers); + else + glTexStorage2D(target, levels, gl_internal_format, width, height); + + if (data) + { + // TODO: Fix data for mipmaps here. + if (layers > 1) + glTexSubImage3D(target, 0, 0, 0, 0, width, height, layers, gl_format, gl_type, data); + else + glTexSubImage2D(target, 0, 0, 0, width, height, gl_format, gl_type, data); + } + } + else + { + for (u32 i = 0; i < levels; i++) + { + // TODO: Fix data pointer here. + if (layers > 1) + glTexImage3D(target, i, gl_internal_format, width, height, layers, 0, gl_format, gl_type, data); + else + glTexImage2D(target, i, gl_internal_format, width, height, 0, gl_format, gl_type, data); + } + + glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels); + } + } + + GLenum error = glGetError(); + if (error != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create texture: 0x%X", error); + glDeleteTextures(1, &id); + return false; + } + + if (IsValid()) + Destroy(); + + m_id = id; + m_width = static_cast(width); + m_height = static_cast(height); + m_layers = static_cast(layers); + m_levels = static_cast(levels); + m_samples = static_cast(samples); + m_type = type; + m_format = format; + m_state = GPUTexture::State::Dirty; + return true; +} + +void OpenGLTexture::Destroy() +{ + if (m_id != 0) + { + OpenGLDevice::GetInstance().UnbindTexture(m_id); + glDeleteTextures(1, &m_id); + m_id = 0; + } + + ClearBaseProperties(); +} + +void OpenGLTexture::CommitClear() +{ + OpenGLDevice::GetInstance().CommitClear(this); +} + +bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + // TODO: perf counters + + // Worth using the PBO? Driver probably knows better... + const GLenum target = GetGLTarget(); + const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format); + const u32 preferred_pitch = + Common::AlignUpPow2(static_cast(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 map_size = preferred_pitch * static_cast(height); + OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); + + CommitClear(); + + OpenGLDevice::BindUpdateTextureUnit(); + glBindTexture(target, m_id); + + if (!sb || map_size > sb->GetChunkSize()) + { + GL_INS("Not using PBO for map size %u", map_size); + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); + glTextureSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data); + } + else + { + const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); + StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * GetPixelSize(), height); + sb->Unmap(map_size); + sb->Bind(); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / GetPixelSize()); + glTextureSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type, + reinterpret_cast(static_cast(map.buffer_offset))); + + sb->Unbind(); + } + + glBindTexture(target, 0); + return true; +} + +bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) + return false; + + const u32 pitch = Common::AlignUpPow2(static_cast(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = pitch * static_cast(height); + OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); + if (!sb || upload_size > sb->GetSize()) + return false; + + const auto res = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, upload_size); + *map = res.pointer; + *map_stride = pitch; + + m_map_offset = res.buffer_offset; + m_map_x = static_cast(x); + m_map_y = static_cast(y); + m_map_width = static_cast(width); + m_map_height = static_cast(height); + m_map_layer = static_cast(layer); + m_map_level = static_cast(level); + return true; +} + +void OpenGLTexture::Unmap() +{ + CommitClear(); + + const u32 pitch = Common::AlignUpPow2(static_cast(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = pitch * static_cast(m_map_height); + OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); + sb->Unmap(upload_size); + sb->Bind(); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, m_map_width); + + OpenGLDevice::BindUpdateTextureUnit(); + + const GLenum target = GetGLTarget(); + glBindTexture(target, m_id); + + const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format); + if (IsTextureArray()) + { + glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, + gl_type, reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, + reinterpret_cast(static_cast(m_map_offset))); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + sb->Unbind(); + + glBindTexture(target, 0); +} + +void OpenGLTexture::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + glObjectLabel(GL_TEXTURE, m_id, static_cast(name.length()), static_cast(name.data())); +#endif +} + +#if 0 +// If we don't have border clamp.. too bad, just hope for the best. +if (!m_gl_context->IsGLES() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_NV_texture_border_clamp || + GLAD_GL_EXT_texture_border_clamp || GLAD_GL_OES_texture_border_clamp) +#endif + +////////////////////////////////////////////////////////////////////////// + +OpenGLSampler::OpenGLSampler(GLuint id) : GPUSampler(), m_id(id) +{ +} + +OpenGLSampler::~OpenGLSampler() +{ + OpenGLDevice::GetInstance().UnbindSampler(m_id); +} + +void OpenGLSampler::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + glObjectLabel(GL_SAMPLER, m_id, static_cast(name.length()), static_cast(name.data())); +#endif +} + +std::unique_ptr OpenGLDevice::CreateSampler(const GPUSampler::Config& config) +{ + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + GL_REPEAT, // Repeat + GL_CLAMP_TO_EDGE, // ClampToEdge + GL_CLAMP_TO_BORDER, // ClampToBorder + }}; + + // [mipmap_on_off][mipmap][filter] + static constexpr GLenum filters[2][2][2] = { + { + // mipmap=off + {GL_NEAREST, GL_LINEAR}, // mipmap=nearest + {GL_NEAREST, GL_LINEAR}, // mipmap=linear + }, + { + // mipmap=on + {GL_NEAREST_MIPMAP_NEAREST, GL_LINEAR_MIPMAP_NEAREST}, // mipmap=nearest + {GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR_MIPMAP_LINEAR}, // mipmap=linear + }, + }; + + GLuint sampler; + glGetError(); + glGenSamplers(1, &sampler); + if (glGetError() != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create sampler: %u", sampler); + return {}; + } + + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, ta[static_cast(config.address_u.GetValue())]); + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, ta[static_cast(config.address_v.GetValue())]); + glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, ta[static_cast(config.address_w.GetValue())]); + const u8 mipmap_on_off = (config.min_lod != 0 || config.max_lod != 0); + glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, + filters[mipmap_on_off][static_cast(config.mip_filter.GetValue())] + [static_cast(config.min_filter.GetValue())]); + glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, + filters[mipmap_on_off][static_cast(config.mip_filter.GetValue())] + [static_cast(config.mag_filter.GetValue())]); + glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, static_cast(config.min_lod)); + glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, static_cast(config.max_lod)); + glSamplerParameterfv(sampler, GL_TEXTURE_BORDER_COLOR, config.GetBorderFloatColor().data()); + if (config.anisotropy) + { + // TODO + } + + return std::unique_ptr(new OpenGLSampler(sampler)); +} + +////////////////////////////////////////////////////////////////////////// + +OpenGLFramebuffer::OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id) + : GPUFramebuffer(rt, ds, width, height), m_id(id) +{ +} + +OpenGLFramebuffer::~OpenGLFramebuffer() +{ + OpenGLDevice::GetInstance().UnbindFramebuffer(this); +} + +void OpenGLFramebuffer::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + glObjectLabel(GL_FRAMEBUFFER, m_id, static_cast(name.length()), static_cast(name.data())); +#endif +} + +void OpenGLFramebuffer::Bind(GLenum target) +{ + glBindFramebuffer(target, m_id); +} + +std::unique_ptr OpenGLDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /* = nullptr */) +{ + glGetError(); + + GLuint fbo_id; + glGenFramebuffers(1, &fbo_id); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id); + + DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); + OpenGLTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); + OpenGLTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); + if (RT) + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, RT->GetGLTarget(), RT->GetGLId(), 0); + if (DS) + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0); + + if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + Log_ErrorPrintf("Failed to create GL framebuffer: %u", glGetError()); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glDeleteFramebuffers(1, &fbo_id); + return {}; + } + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + return std::unique_ptr(new OpenGLFramebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(), + RT ? RT->GetHeight() : DS->GetHeight(), fbo_id)); +} + +void OpenGLDevice::CommitClear(OpenGLTexture* tex) +{ + switch (tex->GetState()) + { + case GPUTexture::State::Invalidated: + { + tex->SetState(GPUTexture::State::Dirty); + + if (glInvalidateTexImage) + { + glInvalidateTexImage(tex->GetGLId(), 0); + } + else if (glInvalidateFramebuffer) + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); + + const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); + + glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + } + } + break; + + case GPUTexture::State::Cleared: + { + tex->SetState(GPUTexture::State::Dirty); + + if (glClearTexImage) + { + const auto [gl_internal_format, gl_format, gl_type] = OpenGLTexture::GetPixelFormatMapping(tex->GetFormat()); + glClearTexImage(tex->GetGLId(), 0, gl_format, gl_type, &tex->GetClearValue()); + } + else + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); + + const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); + + glDisable(GL_SCISSOR_TEST); + if (tex->IsDepthStencil()) + { + glClearDepth(tex->GetClearDepth()); + glClear(GL_DEPTH_BUFFER_BIT); + } + else + { + const auto color = tex->GetUNormClearColor(); + glClearColor(color[0], color[1], color[2], color[3]); + glClear(GL_COLOR_BUFFER_BIT); + } + glEnable(GL_SCISSOR_TEST); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + } + } + break; + + case GPUTexture::State::Dirty: + break; + + default: + UnreachableCode(); + break; + } +} + +void OpenGLDevice::CommitClear(OpenGLFramebuffer* fb) +{ + GLenum clear_flags = 0; + GLenum invalidate_attachments[2]; + GLuint num_invalidate_attachments = 0; + + if (OpenGLTexture* FB = static_cast(fb->GetRT())) + { + switch (FB->GetState()) + { + case GPUTexture::State::Invalidated: + { + invalidate_attachments[num_invalidate_attachments++] = GL_COLOR_ATTACHMENT0; + FB->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Cleared: + { + const auto color = FB->GetUNormClearColor(); + glClearColor(color[0], color[1], color[2], color[3]); + clear_flags |= GL_COLOR_BUFFER_BIT; + FB->SetState(GPUTexture::State::Dirty); + } + + case GPUTexture::State::Dirty: + break; + + default: + UnreachableCode(); + break; + } + } + if (OpenGLTexture* DS = static_cast(fb->GetDS())) + { + switch (DS->GetState()) + { + case GPUTexture::State::Invalidated: + { + invalidate_attachments[num_invalidate_attachments++] = GL_DEPTH_ATTACHMENT; + DS->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Cleared: + { + glClearDepth(DS->GetClearDepth()); + clear_flags |= GL_DEPTH_BUFFER_BIT; + DS->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + break; + + default: + UnreachableCode(); + break; + } + } + + if (clear_flags != 0) + { + glDisable(GL_SCISSOR_TEST); + glClear(clear_flags); + glEnable(GL_SCISSOR_TEST); + } + if (num_invalidate_attachments > 0 && glInvalidateFramebuffer) + glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, num_invalidate_attachments, invalidate_attachments); +} + +////////////////////////////////////////////////////////////////////////// + +OpenGLTextureBuffer::OpenGLTextureBuffer(Format format, u32 size_in_elements, + std::unique_ptr buffer, GLuint texture_id) + : GPUTextureBuffer(format, size_in_elements), m_buffer(std::move(buffer)), m_texture_id(texture_id) +{ +} + +OpenGLTextureBuffer::~OpenGLTextureBuffer() +{ + OpenGLDevice& dev = OpenGLDevice::GetInstance(); + if (m_texture_id != 0) + { + dev.UnbindTexture(m_texture_id); + glDeleteTextures(1, &m_texture_id); + } + else if (dev.GetFeatures().texture_buffers_emulated_with_ssbo && m_buffer) + { + dev.UnbindSSBO(m_buffer->GetGLBufferId()); + } +} + +bool OpenGLTextureBuffer::CreateBuffer() +{ + const bool use_ssbo = OpenGLDevice::GetInstance().GetFeatures().texture_buffers_emulated_with_ssbo; + + const GLenum target = (use_ssbo ? GL_SHADER_STORAGE_BUFFER : GL_TEXTURE_BUFFER); + m_buffer = OpenGLStreamBuffer::Create(target, GetSizeInBytes()); + if (!m_buffer) + return false; + + if (!use_ssbo) + { + glGetError(); + glGenTextures(1, &m_texture_id); + if (const GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create texture for buffer: %u", err); + return false; + } + + OpenGLDevice::BindUpdateTextureUnit(); + glBindTexture(GL_TEXTURE_BUFFER, m_texture_id); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_buffer->GetGLBufferId()); + } + + m_buffer->Unbind(); + + return true; +} + +void* OpenGLTextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const auto map = m_buffer->Map(esize, esize * required_elements); + m_current_position = map.index_aligned; + return map.pointer; +} + +void OpenGLTextureBuffer::Unmap(u32 used_elements) +{ + m_buffer->Unmap(used_elements * GetElementSize(m_format)); +} + +void OpenGLTextureBuffer::SetDebugName(const std::string_view& name) +{ +#ifdef _DEBUG + if (glObjectLabel) + { + glObjectLabel(GL_TEXTURE, m_buffer->GetGLBufferId(), static_cast(name.length()), + static_cast(name.data())); + } +#endif +} + +std::unique_ptr OpenGLDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ + const bool use_ssbo = OpenGLDevice::GetInstance().GetFeatures().texture_buffers_emulated_with_ssbo; + const u32 buffer_size = GPUTextureBuffer::GetElementSize(format) * size_in_elements; + + if (use_ssbo) + { + GLint64 max_ssbo_size = 0; + glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); + if (static_cast(buffer_size) > max_ssbo_size) + { + Log_ErrorPrintf("Buffer size of %u not supported, max is %" PRId64, buffer_size, max_ssbo_size); + return {}; + } + } + + const GLenum target = (use_ssbo ? GL_SHADER_STORAGE_BUFFER : GL_TEXTURE_BUFFER); + std::unique_ptr buffer = OpenGLStreamBuffer::Create(target, buffer_size); + if (!buffer) + return {}; + buffer->Unbind(); + + GLuint texture_id = 0; + if (!use_ssbo) + { + glGetError(); + glGenTextures(1, &texture_id); + if (const GLenum err = glGetError(); err != GL_NO_ERROR) + { + Log_ErrorPrintf("Failed to create texture for buffer: %u", err); + return {}; + } + + OpenGLDevice::BindUpdateTextureUnit(); + glBindTexture(GL_TEXTURE_BUFFER, texture_id); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, buffer->GetGLBufferId()); + } + + return std::unique_ptr( + new OpenGLTextureBuffer(format, size_in_elements, std::move(buffer), texture_id)); +} diff --git a/src/util/opengl_texture.h b/src/util/opengl_texture.h new file mode 100644 index 000000000..3cea4f292 --- /dev/null +++ b/src/util/opengl_texture.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once +#include "gpu_device.h" +#include "gpu_texture.h" +#include "opengl_loader.h" +#include + +class OpenGLDevice; +class OpenGLStreamBuffer; + +class OpenGLTexture final : public GPUTexture +{ + friend OpenGLDevice; + +public: + OpenGLTexture(); + OpenGLTexture(const OpenGLTexture&) = delete; + ~OpenGLTexture(); + + static bool UseTextureStorage(bool multisampled); + static const std::tuple& GetPixelFormatMapping(Format format); + + ALWAYS_INLINE GLuint GetGLId() const { return m_id; } + bool IsValid() const override { return m_id != 0; } + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + + void SetDebugName(const std::string_view& name) override; + + bool Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + const void* data = nullptr, u32 data_pitch = 0); + void Destroy(); + + bool UseTextureStorage() const; + + ALWAYS_INLINE GLenum GetGLTarget() const + { + return (IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE : (IsTextureArray() ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D)); + } + + void CommitClear(); + + OpenGLTexture& operator=(const OpenGLTexture&) = delete; + +private: + GLuint m_id = 0; + + u32 m_map_offset = 0; + u16 m_map_x = 0; + u16 m_map_y = 0; + u16 m_map_width = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; +}; + +class OpenGLFramebuffer final : public GPUFramebuffer +{ + friend OpenGLDevice; + +public: + ~OpenGLFramebuffer() override; + + ALWAYS_INLINE GLuint GetGLId() const { return m_id; } + + void SetDebugName(const std::string_view& name) override; + + void Bind(GLenum target); + +private: + OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id); + + GLuint m_id; +}; + +class OpenGLTextureBuffer final : public GPUTextureBuffer +{ + friend OpenGLDevice; + +public: + ~OpenGLTextureBuffer() override; + + ALWAYS_INLINE OpenGLStreamBuffer* GetBuffer() const { return m_buffer.get(); } + ALWAYS_INLINE GLuint GetTextureId() const { return m_texture_id; } + + bool CreateBuffer(); + + // Inherited via GPUTextureBuffer + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; + + void SetDebugName(const std::string_view& name) override; + +private: + OpenGLTextureBuffer(Format format, u32 size_in_elements, std::unique_ptr buffer, + GLuint texture_id); + + std::unique_ptr m_buffer; + GLuint m_texture_id; +}; + +class OpenGLSampler final : public GPUSampler +{ + friend OpenGLDevice; + +public: + ~OpenGLSampler() override; + + ALWAYS_INLINE GLuint GetID() const { return m_id; } + + void SetDebugName(const std::string_view& name) override; + +private: + OpenGLSampler(GLuint id); + + GLuint m_id; +}; diff --git a/src/util/platform_misc.h b/src/util/platform_misc.h index 8d9d496dc..2fe9e3c5c 100644 --- a/src/util/platform_misc.h +++ b/src/util/platform_misc.h @@ -1,8 +1,6 @@ // SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "common/window_info.h" - namespace FrontendCommon { void SuspendScreensaver(); void ResumeScreensaver(); diff --git a/src/util/postprocessing_chain.cpp b/src/util/postprocessing_chain.cpp index ddd8858af..d6fc1887a 100644 --- a/src/util/postprocessing_chain.cpp +++ b/src/util/postprocessing_chain.cpp @@ -1,50 +1,60 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "postprocessing_chain.h" +#include "gpu_device.h" +#include "postprocessing_shader_glsl.h" + +#include "core/host.h" +#include "core/settings.h" + #include "common/assert.h" #include "common/file_system.h" #include "common/log.h" -#include "common/string.h" #include "common/path.h" -#include "core/host.h" -#include "core/settings.h" +#include "common/string.h" + #include "fmt/format.h" #include + Log_SetChannel(PostProcessingChain); -namespace FrontendCommon { - -static bool TryLoadingShader(PostProcessingShader* shader, const std::string_view& shader_name) +static std::unique_ptr TryLoadingShader(const std::string_view& shader_name) { std::string filename(Path::Combine(EmuFolders::Shaders, fmt::format("{}.glsl", shader_name))); if (FileSystem::FileExists(filename.c_str())) { + std::unique_ptr shader = std::make_unique(); if (shader->LoadFromFile(std::string(shader_name), filename.c_str())) - return true; + return shader; } - std::optional resource_str(Host::ReadResourceFileToString(fmt::format("shaders" FS_OSPATH_SEPARATOR_STR "{}.glsl", shader_name).c_str())); - if (resource_str.has_value() && shader->LoadFromString(std::string(shader_name), std::move(resource_str.value()))) - return true; + std::optional resource_str( + Host::ReadResourceFileToString(fmt::format("shaders" FS_OSPATH_SEPARATOR_STR "{}.glsl", shader_name).c_str())); + if (resource_str.has_value()) + { + std::unique_ptr shader = std::make_unique(); + if (shader->LoadFromString(std::string(shader_name), std::move(resource_str.value()))) + return shader; + } - Log_ErrorPrintf("Failed to load shader from '%s'", filename.c_str()); - return false; + Log_ErrorPrintf(fmt::format("Failed to load shader '{}'", shader_name).c_str()); + return {}; } PostProcessingChain::PostProcessingChain() = default; PostProcessingChain::~PostProcessingChain() = default; -void PostProcessingChain::AddShader(PostProcessingShader shader) +void PostProcessingChain::AddShader(std::unique_ptr shader) { m_shaders.push_back(std::move(shader)); } bool PostProcessingChain::AddStage(const std::string_view& name) { - PostProcessingShader shader; - if (!TryLoadingShader(&shader, name)) + std::unique_ptr shader = TryLoadingShader(name); + if (!shader) return false; m_shaders.push_back(std::move(shader)); @@ -56,15 +66,15 @@ std::string PostProcessingChain::GetConfigString() const std::stringstream ss; bool first = true; - for (const PostProcessingShader& shader : m_shaders) + for (const auto& shader : m_shaders) { if (!first) ss << ':'; else first = false; - ss << shader.GetName(); - std::string config_string = shader.GetConfigString(); + ss << shader->GetName(); + std::string config_string = shader->GetConfigString(); if (!config_string.empty()) ss << ';' << config_string; } @@ -74,7 +84,7 @@ std::string PostProcessingChain::GetConfigString() const bool PostProcessingChain::CreateFromString(const std::string_view& chain_config) { - std::vector shaders; + std::vector> shaders; size_t last_sep = 0; while (last_sep < chain_config.size()) @@ -91,12 +101,12 @@ bool PostProcessingChain::CreateFromString(const std::string_view& chain_config) const std::string_view shader_name = shader_config.substr(0, first_shader_sep); if (!shader_name.empty()) { - PostProcessingShader shader; - if (!TryLoadingShader(&shader, shader_name)) + std::unique_ptr shader = TryLoadingShader(shader_name); + if (!shader) return false; if (first_shader_sep < shader_config.size()) - shader.SetConfigString(shader_config.substr(first_shader_sep + 1)); + shader->SetConfigString(shader_config.substr(first_shader_sep + 1)); shaders.push_back(std::move(shader)); } @@ -161,7 +171,7 @@ void PostProcessingChain::MoveStageUp(u32 index) if (index == 0) return; - PostProcessingShader shader = std::move(m_shaders[index]); + auto shader = std::move(m_shaders[index]); m_shaders.erase(m_shaders.begin() + index); m_shaders.insert(m_shaders.begin() + (index - 1u), std::move(shader)); } @@ -172,7 +182,7 @@ void PostProcessingChain::MoveStageDown(u32 index) if (index == (m_shaders.size() - 1u)) return; - PostProcessingShader shader = std::move(m_shaders[index]); + auto shader = std::move(m_shaders[index]); m_shaders.erase(m_shaders.begin() + index); m_shaders.insert(m_shaders.begin() + (index + 1u), std::move(shader)); } @@ -182,4 +192,88 @@ void PostProcessingChain::ClearStages() m_shaders.clear(); } -} // namespace FrontendCommon \ No newline at end of file +bool PostProcessingChain::CheckTargets(GPUTexture::Format format, u32 target_width, u32 target_height) +{ + if (m_target_format == format && m_target_width == target_width && m_target_height == target_height) + return true; + + // In case any allocs fail. + m_target_format = GPUTexture::Format::Unknown; + m_target_width = 0; + m_target_height = 0; + m_output_framebuffer.reset(); + m_output_texture.reset(); + m_input_framebuffer.reset(); + m_input_texture.reset(); + + if (!(m_input_texture = + g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget, format)) || + !(m_input_framebuffer = g_gpu_device->CreateFramebuffer(m_input_texture.get()))) + { + return false; + } + + if (!(m_output_texture = + g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget, format)) || + !(m_output_framebuffer = g_gpu_device->CreateFramebuffer(m_output_texture.get()))) + { + return false; + } + + for (auto& shader : m_shaders) + { + if (!shader->CompilePipeline(format, target_width, target_height) || + !shader->ResizeOutput(format, target_width, target_height)) + { + Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); + return false; + } + } + + m_target_format = format; + m_target_width = target_width; + m_target_height = target_height; + + return true; +} + +bool PostProcessingChain::Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, + s32 final_height, s32 orig_width, s32 orig_height) +{ + GL_SCOPE("PostProcessingChain Apply"); + + const u32 target_width = final_target ? final_target->GetWidth() : g_gpu_device->GetWindowWidth(); + const u32 target_height = final_target ? final_target->GetHeight() : g_gpu_device->GetWindowHeight(); + const GPUTexture::Format target_format = + final_target ? final_target->GetRT()->GetFormat() : g_gpu_device->GetWindowFormat(); + if (!CheckTargets(target_format, target_width, target_height)) + return false; + + g_gpu_device->SetViewportAndScissor(final_left, final_top, final_width, final_height); + + GPUTexture* input = m_input_texture.get(); + GPUFramebuffer* input_fb = m_input_framebuffer.get(); + GPUTexture* output = m_output_texture.get(); + GPUFramebuffer* output_fb = m_output_framebuffer.get(); + input->MakeReadyForSampling(); + + for (const std::unique_ptr& stage : m_shaders) + { + const bool is_final = (stage.get() == m_shaders.back().get()); + + if (!stage->Apply(input, is_final ? nullptr : output_fb, final_left, final_top, final_width, final_height, + orig_width, orig_height, m_target_width, m_target_height)) + { + return false; + } + + if (!is_final) + { + output->MakeReadyForSampling(); + std::swap(input, output); + std::swap(input_fb, output_fb); + } + } + + return true; +} diff --git a/src/util/postprocessing_chain.h b/src/util/postprocessing_chain.h index f9b2de53c..a49b0421e 100644 --- a/src/util/postprocessing_chain.h +++ b/src/util/postprocessing_chain.h @@ -1,12 +1,19 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once +#include "gpu_device.h" #include "postprocessing_shader.h" + +#include "common/timer.h" + +#include #include #include -namespace FrontendCommon { +class GPUSampler; +class GPUFramebuffer; +class GPUTexture; class PostProcessingChain { @@ -14,12 +21,16 @@ public: PostProcessingChain(); ~PostProcessingChain(); + static std::vector GetAvailableShaderNames(); + ALWAYS_INLINE bool IsEmpty() const { return m_shaders.empty(); } ALWAYS_INLINE u32 GetStageCount() const { return static_cast(m_shaders.size()); } - ALWAYS_INLINE const PostProcessingShader& GetShaderStage(u32 i) const { return m_shaders[i]; } - ALWAYS_INLINE PostProcessingShader& GetShaderStage(u32 i) { return m_shaders[i]; } + ALWAYS_INLINE const PostProcessingShader* GetShaderStage(u32 i) const { return m_shaders[i].get(); } + ALWAYS_INLINE PostProcessingShader* GetShaderStage(u32 i) { return m_shaders[i].get(); } + ALWAYS_INLINE GPUTexture* GetInputTexture() const { return m_input_texture.get(); } + ALWAYS_INLINE GPUFramebuffer* GetInputFramebuffer() const { return m_input_framebuffer.get(); } - void AddShader(PostProcessingShader shader); + void AddShader(std::unique_ptr shader); bool AddStage(const std::string_view& name); void RemoveStage(u32 index); void MoveStageUp(u32 index); @@ -30,10 +41,21 @@ public: bool CreateFromString(const std::string_view& chain_config); - static std::vector GetAvailableShaderNames(); + bool CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height); + + bool Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, + s32 orig_width, s32 orig_height); private: - std::vector m_shaders; -}; + std::vector> m_shaders; -} // namespace FrontendCommon + GPUTexture::Format m_target_format = GPUTexture::Format::Unknown; + u32 m_target_width = 0; + u32 m_target_height = 0; + + std::unique_ptr m_input_texture; + std::unique_ptr m_input_framebuffer; + + std::unique_ptr m_output_texture; + std::unique_ptr m_output_framebuffer; +}; diff --git a/src/util/postprocessing_shader.cpp b/src/util/postprocessing_shader.cpp index 6eb803bb3..7e4bee367 100644 --- a/src/util/postprocessing_shader.cpp +++ b/src/util/postprocessing_shader.cpp @@ -2,18 +2,18 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "postprocessing_shader.h" + #include "common/file_system.h" #include "common/log.h" #include "common/string_util.h" -#include "shadergen.h" + #include #include #include + Log_SetChannel(PostProcessingShader); -namespace FrontendCommon { - -void ParseKeyValue(const std::string_view& line, std::string_view* key, std::string_view* value) +void PostProcessingShader::ParseKeyValue(const std::string_view& line, std::string_view* key, std::string_view* value) { size_t key_start = 0; while (key_start < line.size() && std::isspace(line[key_start])) @@ -49,7 +49,7 @@ void ParseKeyValue(const std::string_view& line, std::string_view* key, std::str } template -u32 ParseVector(const std::string_view& line, PostProcessingShader::Option::ValueVector* values) +u32 PostProcessingShader::ParseVector(const std::string_view& line, PostProcessingShader::Option::ValueVector* values) { u32 index = 0; size_t start = 0; @@ -88,46 +88,22 @@ u32 ParseVector(const std::string_view& line, PostProcessingShader::Option::Valu return size; } +template u32 PostProcessingShader::ParseVector(const std::string_view& line, + PostProcessingShader::Option::ValueVector* values); +template u32 PostProcessingShader::ParseVector(const std::string_view& line, + PostProcessingShader::Option::ValueVector* values); + PostProcessingShader::PostProcessingShader() = default; -PostProcessingShader::PostProcessingShader(std::string name, std::string code) : m_name(name), m_code(code) -{ - LoadOptions(); -} - -PostProcessingShader::PostProcessingShader(const PostProcessingShader& copy) - : m_name(copy.m_name), m_code(copy.m_code), m_options(copy.m_options) -{ -} - -PostProcessingShader::PostProcessingShader(PostProcessingShader& move) - : m_name(std::move(move.m_name)), m_code(std::move(move.m_code)), m_options(std::move(move.m_options)) +PostProcessingShader::PostProcessingShader(std::string name) : m_name(std::move(name)) { } PostProcessingShader::~PostProcessingShader() = default; -bool PostProcessingShader::LoadFromFile(std::string name, const char* filename) -{ - std::optional code = FileSystem::ReadFileToString(filename); - if (!code.has_value() || code->empty()) - return false; - - return LoadFromString(std::move(name), code.value()); -} - -bool PostProcessingShader::LoadFromString(std::string name, std::string code) -{ - m_name = std::move(name); - m_code = std::move(code); - m_options.clear(); - LoadOptions(); - return true; -} - bool PostProcessingShader::IsValid() const { - return !m_name.empty() && !m_code.empty(); + return false; } const PostProcessingShader::Option* PostProcessingShader::GetOptionByName(const std::string_view& name) const @@ -141,7 +117,7 @@ const PostProcessingShader::Option* PostProcessingShader::GetOptionByName(const return nullptr; } -FrontendCommon::PostProcessingShader::Option* PostProcessingShader::GetOptionByName(const std::string_view& name) +PostProcessingShader::Option* PostProcessingShader::GetOptionByName(const std::string_view& name) { for (Option& option : m_options) { @@ -237,202 +213,3 @@ void PostProcessingShader::SetConfigString(const std::string_view& str) last_sep = next_sep + 1; } } - -bool PostProcessingShader::UsePushConstants() const -{ - return GetUniformsSize() <= PUSH_CONSTANT_SIZE_THRESHOLD; -} - -u32 PostProcessingShader::GetUniformsSize() const -{ - // lazy packing. todo improve. - return sizeof(CommonUniforms) + (sizeof(Option::ValueVector) * static_cast(m_options.size())); -} - -void PostProcessingShader::FillUniformBuffer(void* buffer, u32 texture_width, s32 texture_height, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, - u32 window_width, u32 window_height, s32 original_width, - s32 original_height, float time) const -{ - CommonUniforms* common = static_cast(buffer); - - const float rcp_texture_width = 1.0f / static_cast(texture_width); - const float rcp_texture_height = 1.0f / static_cast(texture_height); - common->src_rect[0] = static_cast(texture_view_x) * rcp_texture_width; - common->src_rect[1] = static_cast(texture_view_y) * rcp_texture_height; - common->src_rect[2] = (static_cast(texture_view_x + texture_view_width - 1)) * rcp_texture_width; - common->src_rect[3] = (static_cast(texture_view_y + texture_view_height - 1)) * rcp_texture_height; - common->src_size[0] = (static_cast(texture_view_width)) * rcp_texture_width; - common->src_size[1] = (static_cast(texture_view_height)) * rcp_texture_height; - common->resolution[0] = static_cast(texture_width); - common->resolution[1] = static_cast(texture_height); - common->rcp_resolution[0] = rcp_texture_width; - common->rcp_resolution[1] = rcp_texture_height; - common->window_resolution[0] = static_cast(window_width); - common->window_resolution[1] = static_cast(window_height); - common->rcp_window_resolution[0] = 1.0f / static_cast(window_width); - common->rcp_window_resolution[1] = 1.0f / static_cast(window_height); - - // pad the "original size" relative to the positioning on the screen - const float view_scale_x = static_cast(original_width) / static_cast(texture_view_width); - const float view_scale_y = static_cast(original_height) / static_cast(texture_view_height); - const s32 view_pad_x = texture_view_x + (texture_width - texture_view_width - texture_view_x); - const s32 view_pad_y = texture_view_y + (texture_height - texture_view_height - texture_view_y); - common->original_size[0] = static_cast(original_width); - common->original_size[1] = static_cast(original_height); - common->padded_original_size[0] = common->original_size[0] + static_cast(view_pad_x) * view_scale_x; - common->padded_original_size[1] = common->original_size[1] + static_cast(view_pad_y) * view_scale_y; - - common->time = time; - - u8* option_values = reinterpret_cast(common + 1); - for (const Option& option : m_options) - { - std::memcpy(option_values, option.value.data(), sizeof(Option::ValueVector)); - option_values += sizeof(Option::ValueVector); - } -} - -FrontendCommon::PostProcessingShader& PostProcessingShader::operator=(const PostProcessingShader& copy) -{ - m_name = copy.m_name; - m_code = copy.m_code; - m_options = copy.m_options; - return *this; -} - -FrontendCommon::PostProcessingShader& PostProcessingShader::operator=(PostProcessingShader& move) -{ - m_name = std::move(move.m_name); - m_code = std::move(move.m_code); - m_options = std::move(move.m_options); - return *this; -} - -void PostProcessingShader::LoadOptions() -{ - // Adapted from Dolphin's PostProcessingConfiguration::LoadOptions(). - constexpr char config_start_delimiter[] = "[configuration]"; - constexpr char config_end_delimiter[] = "[/configuration]"; - size_t configuration_start = m_code.find(config_start_delimiter); - size_t configuration_end = m_code.find(config_end_delimiter); - if (configuration_start == std::string::npos || configuration_end == std::string::npos) - { - // Issue loading configuration or there isn't one. - return; - } - - std::string configuration_string = - m_code.substr(configuration_start + std::strlen(config_start_delimiter), - configuration_end - configuration_start - std::strlen(config_start_delimiter)); - - std::istringstream in(configuration_string); - - Option current_option = {}; - while (!in.eof()) - { - std::string line_str; - if (std::getline(in, line_str)) - { - std::string_view line_view = line_str; - - // Check for CRLF eol and convert it to LF - if (!line_view.empty() && line_view.at(line_view.size() - 1) == '\r') - line_view.remove_suffix(1); - - if (line_view.empty()) - continue; - - if (line_view[0] == '[') - { - size_t endpos = line_view.find("]"); - if (endpos != std::string::npos) - { - if (current_option.type != Option::Type::Invalid) - { - current_option.value = current_option.default_value; - if (current_option.ui_name.empty()) - current_option.ui_name = current_option.name; - - if (!current_option.name.empty() && current_option.vector_size > 0) - m_options.push_back(std::move(current_option)); - - current_option = {}; - } - - // New section! - std::string_view sub = line_view.substr(1, endpos - 1); - if (sub == "OptionBool") - current_option.type = Option::Type::Bool; - else if (sub == "OptionRangeFloat") - current_option.type = Option::Type::Float; - else if (sub == "OptionRangeInteger") - current_option.type = Option::Type::Int; - else - Log_ErrorPrintf("Invalid option type: '%s'", line_str.c_str()); - - continue; - } - } - - if (current_option.type == Option::Type::Invalid) - continue; - - std::string_view key, value; - ParseKeyValue(line_view, &key, &value); - if (!key.empty() && !value.empty()) - { - if (key == "GUIName") - { - current_option.ui_name = value; - } - else if (key == "OptionName") - { - current_option.name = value; - } - else if (key == "DependentOption") - { - current_option.dependent_option = value; - } - else if (key == "MinValue" || key == "MaxValue" || key == "DefaultValue" || key == "StepAmount") - { - Option::ValueVector* dst_array; - if (key == "MinValue") - dst_array = ¤t_option.min_value; - else if (key == "MaxValue") - dst_array = ¤t_option.max_value; - else if (key == "DefaultValue") - dst_array = ¤t_option.default_value; - else // if (key == "StepAmount") - dst_array = ¤t_option.step_value; - - u32 size = 0; - if (current_option.type == Option::Type::Bool) - (*dst_array)[size++].int_value = StringUtil::FromChars(value).value_or(false) ? 1 : 0; - else if (current_option.type == Option::Type::Float) - size = ParseVector(value, dst_array); - else if (current_option.type == Option::Type::Int) - size = ParseVector(value, dst_array); - - current_option.vector_size = - (current_option.vector_size == 0) ? size : std::min(current_option.vector_size, size); - } - else - { - Log_ErrorPrintf("Invalid option key: '%s'", line_str.c_str()); - } - } - } - } - - if (current_option.type != Option::Type::Invalid && !current_option.name.empty() && current_option.vector_size > 0) - { - current_option.value = current_option.default_value; - if (current_option.ui_name.empty()) - current_option.ui_name = current_option.name; - - m_options.push_back(std::move(current_option)); - } -} - -} // namespace FrontendCommon diff --git a/src/util/postprocessing_shader.h b/src/util/postprocessing_shader.h index bf8a27cac..6833b21c6 100644 --- a/src/util/postprocessing_shader.h +++ b/src/util/postprocessing_shader.h @@ -1,24 +1,28 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once + #include "common/rectangle.h" -#include "core/types.h" +#include "common/timer.h" +#include "common/types.h" +#include "gpu_device.h" + #include #include #include #include -namespace FrontendCommon { +class GPUPipeline; +class GPUTexture; + +class PostProcessingChain; class PostProcessingShader { -public: - enum : u32 - { - PUSH_CONSTANT_SIZE_THRESHOLD = 128 - }; + friend PostProcessingChain; +public: struct Option { enum : u32 @@ -49,6 +53,8 @@ public: std::string dependent_option; Type type; u32 vector_size; + u32 buffer_size; + u32 buffer_offset; ValueVector default_value; ValueVector min_value; ValueVector max_value; @@ -57,21 +63,15 @@ public: }; PostProcessingShader(); - PostProcessingShader(std::string name, std::string code); - PostProcessingShader(const PostProcessingShader& copy); - PostProcessingShader(PostProcessingShader& move); - ~PostProcessingShader(); - - PostProcessingShader& operator=(const PostProcessingShader& copy); - PostProcessingShader& operator=(PostProcessingShader& move); + PostProcessingShader(std::string name); + virtual ~PostProcessingShader(); ALWAYS_INLINE const std::string& GetName() const { return m_name; } - ALWAYS_INLINE const std::string& GetCode() const { return m_code; } ALWAYS_INLINE const std::vector @@ -66,19 +118,28 @@ - - + + + + + + + + + + + - + + true + + + true + + + + - - - - true - - - true - @@ -86,25 +147,56 @@ - + + true + + + true + + + true + + true - + + + true + - + + true + + + true + + + true + + + true + + + true + + + true + + true + @@ -112,6 +204,9 @@ {72f9423c-91ee-4487-aac6-555ed6f61aa1} + + {f351c4d8-594a-4850-b77b-3c1249812cce} + {bb08260f-6fbc-46af-8924-090ee71360c6} @@ -121,6 +216,15 @@ {751d9f62-881c-454e-bce8-cb9cf5f1d22f} + + {43540154-9e1e-409c-834f-b84be5621388} + + + {7f909e29-4808-4bd9-a60c-56c51a3aaec2} + + + {73ee0c55-6ffe-44e7-9c12-baa52434a797} + {ee054e08-3799-4a59-a422-18259c105ffd} diff --git a/src/util/util.vcxproj.filters b/src/util/util.vcxproj.filters index 5b3895d8f..7a76f2d01 100644 --- a/src/util/util.vcxproj.filters +++ b/src/util/util.vcxproj.filters @@ -16,19 +16,9 @@ - - - - - - - - - - @@ -40,6 +30,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + gl + + + gl + + + + + @@ -66,21 +94,11 @@ - - - - - - - - - - @@ -90,5 +108,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + gl + + + gl + + + + + - \ No newline at end of file + + + {e637fc5b-2483-4a31-abc3-89a16d45c223} + + + diff --git a/src/util/vulkan_builders.cpp b/src/util/vulkan_builders.cpp new file mode 100644 index 000000000..ce7d1a757 --- /dev/null +++ b/src/util/vulkan_builders.cpp @@ -0,0 +1,1077 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "vulkan_builders.h" + +#include "common/assert.h" +#include "common/log.h" + +#include + +void Vulkan::AddPointerToChain(void* head, const void* ptr) +{ + VkBaseInStructure* last_st = static_cast(head); + while (last_st->pNext) + { + if (last_st->pNext == ptr) + return; + + last_st = const_cast(last_st->pNext); + } + + last_st->pNext = static_cast(ptr); +} + +const char* Vulkan::VkResultToString(VkResult res) +{ + switch (res) + { + case VK_SUCCESS: + return "VK_SUCCESS"; + + case VK_NOT_READY: + return "VK_NOT_READY"; + + case VK_TIMEOUT: + return "VK_TIMEOUT"; + + case VK_EVENT_SET: + return "VK_EVENT_SET"; + + case VK_EVENT_RESET: + return "VK_EVENT_RESET"; + + case VK_INCOMPLETE: + return "VK_INCOMPLETE"; + + case VK_ERROR_OUT_OF_HOST_MEMORY: + return "VK_ERROR_OUT_OF_HOST_MEMORY"; + + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + + case VK_ERROR_INITIALIZATION_FAILED: + return "VK_ERROR_INITIALIZATION_FAILED"; + + case VK_ERROR_DEVICE_LOST: + return "VK_ERROR_DEVICE_LOST"; + + case VK_ERROR_MEMORY_MAP_FAILED: + return "VK_ERROR_MEMORY_MAP_FAILED"; + + case VK_ERROR_LAYER_NOT_PRESENT: + return "VK_ERROR_LAYER_NOT_PRESENT"; + + case VK_ERROR_EXTENSION_NOT_PRESENT: + return "VK_ERROR_EXTENSION_NOT_PRESENT"; + + case VK_ERROR_FEATURE_NOT_PRESENT: + return "VK_ERROR_FEATURE_NOT_PRESENT"; + + case VK_ERROR_INCOMPATIBLE_DRIVER: + return "VK_ERROR_INCOMPATIBLE_DRIVER"; + + case VK_ERROR_TOO_MANY_OBJECTS: + return "VK_ERROR_TOO_MANY_OBJECTS"; + + case VK_ERROR_FORMAT_NOT_SUPPORTED: + return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + + case VK_ERROR_SURFACE_LOST_KHR: + return "VK_ERROR_SURFACE_LOST_KHR"; + + case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: + return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; + + case VK_SUBOPTIMAL_KHR: + return "VK_SUBOPTIMAL_KHR"; + + case VK_ERROR_OUT_OF_DATE_KHR: + return "VK_ERROR_OUT_OF_DATE_KHR"; + + case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: + return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; + + case VK_ERROR_VALIDATION_FAILED_EXT: + return "VK_ERROR_VALIDATION_FAILED_EXT"; + + case VK_ERROR_INVALID_SHADER_NV: + return "VK_ERROR_INVALID_SHADER_NV"; + + default: + return "UNKNOWN_VK_RESULT"; + } +} + +void Vulkan::LogVulkanResult(const char* func_name, VkResult res, const char* msg, ...) +{ + std::va_list ap; + va_start(ap, msg); + std::string real_msg = StringUtil::StdStringFromFormatV(msg, ap); + va_end(ap); + + Log::Writef("VulkanDevice", func_name, LOGLEVEL_ERROR, "%s (%d: %s)", real_msg.c_str(), static_cast(res), + VkResultToString(res)); +} + +Vulkan::DescriptorSetLayoutBuilder::DescriptorSetLayoutBuilder() +{ + Clear(); +} + +void Vulkan::DescriptorSetLayoutBuilder::Clear() +{ + m_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + m_ci.pNext = nullptr; + m_ci.flags = 0; + m_ci.pBindings = nullptr; + m_ci.bindingCount = 0; +} + +void Vulkan::DescriptorSetLayoutBuilder::SetPushFlag() +{ + m_ci.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; +} + +VkDescriptorSetLayout Vulkan::DescriptorSetLayoutBuilder::Create(VkDevice device) +{ + VkDescriptorSetLayout layout; + VkResult res = vkCreateDescriptorSetLayout(device, &m_ci, nullptr, &layout); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorSetLayout() failed: "); + return VK_NULL_HANDLE; + } + + Clear(); + return layout; +} + +void Vulkan::DescriptorSetLayoutBuilder::AddBinding(u32 binding, VkDescriptorType dtype, u32 dcount, + VkShaderStageFlags stages) +{ + DebugAssert(m_ci.bindingCount < MAX_BINDINGS); + + VkDescriptorSetLayoutBinding& b = m_bindings[m_ci.bindingCount]; + b.binding = binding; + b.descriptorType = dtype; + b.descriptorCount = dcount; + b.stageFlags = stages; + b.pImmutableSamplers = nullptr; + + m_ci.pBindings = m_bindings.data(); + m_ci.bindingCount++; +} + +Vulkan::PipelineLayoutBuilder::PipelineLayoutBuilder() +{ + Clear(); +} + +void Vulkan::PipelineLayoutBuilder::Clear() +{ + m_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + m_ci.pNext = nullptr; + m_ci.flags = 0; + m_ci.pSetLayouts = nullptr; + m_ci.setLayoutCount = 0; + m_ci.pPushConstantRanges = nullptr; + m_ci.pushConstantRangeCount = 0; +} + +VkPipelineLayout Vulkan::PipelineLayoutBuilder::Create(VkDevice device) +{ + VkPipelineLayout layout; + VkResult res = vkCreatePipelineLayout(device, &m_ci, nullptr, &layout); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreatePipelineLayout() failed: "); + return VK_NULL_HANDLE; + } + + Clear(); + return layout; +} + +void Vulkan::PipelineLayoutBuilder::AddDescriptorSet(VkDescriptorSetLayout layout) +{ + DebugAssert(m_ci.setLayoutCount < MAX_SETS); + + m_sets[m_ci.setLayoutCount] = layout; + + m_ci.setLayoutCount++; + m_ci.pSetLayouts = m_sets.data(); +} + +void Vulkan::PipelineLayoutBuilder::AddPushConstants(VkShaderStageFlags stages, u32 offset, u32 size) +{ + DebugAssert(m_ci.pushConstantRangeCount < MAX_PUSH_CONSTANTS); + + VkPushConstantRange& r = m_push_constants[m_ci.pushConstantRangeCount]; + r.stageFlags = stages; + r.offset = offset; + r.size = size; + + m_ci.pushConstantRangeCount++; + m_ci.pPushConstantRanges = m_push_constants.data(); +} + +Vulkan::GraphicsPipelineBuilder::GraphicsPipelineBuilder() +{ + Clear(); +} + +void Vulkan::GraphicsPipelineBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + + m_shader_stages = {}; + + m_vertex_input_state = {}; + m_vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + m_ci.pVertexInputState = &m_vertex_input_state; + m_vertex_attributes = {}; + m_vertex_buffers = {}; + + m_input_assembly = {}; + m_input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + + m_rasterization_state = {}; + m_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + m_rasterization_state.lineWidth = 1.0f; + m_depth_state = {}; + m_depth_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + m_blend_state = {}; + m_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + m_blend_attachments = {}; + + m_viewport_state = {}; + m_viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + m_viewport = {}; + m_scissor = {}; + + m_dynamic_state = {}; + m_dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + m_dynamic_state_values = {}; + + m_multisample_state = {}; + m_multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + + m_provoking_vertex = {}; + m_provoking_vertex.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT; + + m_line_rasterization_state = {}; + m_line_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT; + + // set defaults + SetNoCullRasterizationState(); + SetNoDepthTestState(); + SetNoBlendingState(); + SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + + // have to be specified even if dynamic + SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); + SetScissorRect(0, 0, 1, 1); + SetMultisamples(VK_SAMPLE_COUNT_1_BIT); +} + +VkPipeline Vulkan::GraphicsPipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, + bool clear /* = true */) +{ + VkPipeline pipeline; + VkResult res = vkCreateGraphicsPipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines() failed: "); + return VK_NULL_HANDLE; + } + + if (clear) + Clear(); + + return pipeline; +} + +void Vulkan::GraphicsPipelineBuilder::SetShaderStage(VkShaderStageFlagBits stage, VkShaderModule module, + const char* entry_point) +{ + DebugAssert(m_ci.stageCount < MAX_SHADER_STAGES); + + u32 index = 0; + for (; index < m_ci.stageCount; index++) + { + if (m_shader_stages[index].stage == stage) + break; + } + if (index == m_ci.stageCount) + { + m_ci.stageCount++; + m_ci.pStages = m_shader_stages.data(); + } + + VkPipelineShaderStageCreateInfo& s = m_shader_stages[index]; + s.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + s.stage = stage; + s.module = module; + s.pName = entry_point; +} + +void Vulkan::GraphicsPipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, + VkVertexInputRate input_rate /*= VK_VERTEX_INPUT_RATE_VERTEX*/) +{ + DebugAssert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); + + VkVertexInputBindingDescription& b = m_vertex_buffers[m_vertex_input_state.vertexBindingDescriptionCount]; + b.binding = binding; + b.stride = stride; + b.inputRate = input_rate; + + m_vertex_input_state.vertexBindingDescriptionCount++; + m_vertex_input_state.pVertexBindingDescriptions = m_vertex_buffers.data(); + m_ci.pVertexInputState = &m_vertex_input_state; +} + +void Vulkan::GraphicsPipelineBuilder::AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset) +{ + DebugAssert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); + + VkVertexInputAttributeDescription& a = m_vertex_attributes[m_vertex_input_state.vertexAttributeDescriptionCount]; + a.location = location; + a.binding = binding; + a.format = format; + a.offset = offset; + + m_vertex_input_state.vertexAttributeDescriptionCount++; + m_vertex_input_state.pVertexAttributeDescriptions = m_vertex_attributes.data(); + m_ci.pVertexInputState = &m_vertex_input_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetPrimitiveTopology(VkPrimitiveTopology topology, + bool enable_primitive_restart /*= false*/) +{ + m_input_assembly.topology = topology; + m_input_assembly.primitiveRestartEnable = enable_primitive_restart; + + m_ci.pInputAssemblyState = &m_input_assembly; +} + +void Vulkan::GraphicsPipelineBuilder::SetRasterizationState(VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, + VkFrontFace front_face) +{ + m_rasterization_state.polygonMode = polygon_mode; + m_rasterization_state.cullMode = cull_mode; + m_rasterization_state.frontFace = front_face; + + m_ci.pRasterizationState = &m_rasterization_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetLineWidth(float width) +{ + m_rasterization_state.lineWidth = width; +} + +void Vulkan::GraphicsPipelineBuilder::SetLineRasterizationMode(VkLineRasterizationModeEXT mode) +{ + AddPointerToChain(&m_rasterization_state, &m_line_rasterization_state); + + m_line_rasterization_state.lineRasterizationMode = mode; +} + +void Vulkan::GraphicsPipelineBuilder::SetMultisamples(u32 multisamples, bool per_sample_shading) +{ + m_multisample_state.rasterizationSamples = static_cast(multisamples); + m_multisample_state.sampleShadingEnable = per_sample_shading; + m_multisample_state.minSampleShading = (multisamples > 1) ? 1.0f : 0.0f; + m_ci.pMultisampleState = &m_multisample_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetMultisamples(VkSampleCountFlagBits samples) +{ + m_multisample_state.rasterizationSamples = samples; + m_ci.pMultisampleState = &m_multisample_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetNoCullRasterizationState() +{ + SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); +} + +void Vulkan::GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op) +{ + m_depth_state.depthTestEnable = depth_test; + m_depth_state.depthWriteEnable = depth_write; + m_depth_state.depthCompareOp = compare_op; + + m_ci.pDepthStencilState = &m_depth_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetStencilState(bool stencil_test, const VkStencilOpState& front, + const VkStencilOpState& back) +{ + m_depth_state.stencilTestEnable = stencil_test; + m_depth_state.front = front; + m_depth_state.back = back; +} + +void Vulkan::GraphicsPipelineBuilder::SetNoStencilState() +{ + m_depth_state.stencilTestEnable = VK_FALSE; + m_depth_state.front = {}; + m_depth_state.back = {}; +} + +void Vulkan::GraphicsPipelineBuilder::SetNoDepthTestState() +{ + SetDepthState(false, false, VK_COMPARE_OP_ALWAYS); +} + +void Vulkan::GraphicsPipelineBuilder::SetBlendConstants(float r, float g, float b, float a) +{ + m_blend_state.blendConstants[0] = r; + m_blend_state.blendConstants[1] = g; + m_blend_state.blendConstants[2] = b; + m_blend_state.blendConstants[3] = a; + m_ci.pColorBlendState = &m_blend_state; +} + +void Vulkan::GraphicsPipelineBuilder::AddBlendAttachment( + bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, + VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, VkColorComponentFlags write_mask /* = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT */) +{ + DebugAssert(m_blend_state.attachmentCount < MAX_ATTACHMENTS); + + VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[m_blend_state.attachmentCount]; + bs.blendEnable = blend_enable; + bs.srcColorBlendFactor = src_factor; + bs.dstColorBlendFactor = dst_factor; + bs.colorBlendOp = op; + bs.srcAlphaBlendFactor = alpha_src_factor; + bs.dstAlphaBlendFactor = alpha_dst_factor; + bs.alphaBlendOp = alpha_op; + bs.colorWriteMask = write_mask; + + m_blend_state.attachmentCount++; + m_blend_state.pAttachments = m_blend_attachments.data(); + m_ci.pColorBlendState = &m_blend_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetBlendAttachment( + u32 attachment, bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, + VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, VkColorComponentFlags write_mask /*= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT*/) +{ + DebugAssert(attachment < MAX_ATTACHMENTS); + + VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[attachment]; + bs.blendEnable = blend_enable; + bs.srcColorBlendFactor = src_factor; + bs.dstColorBlendFactor = dst_factor; + bs.colorBlendOp = op; + bs.srcAlphaBlendFactor = alpha_src_factor; + bs.dstAlphaBlendFactor = alpha_dst_factor; + bs.alphaBlendOp = alpha_op; + bs.colorWriteMask = write_mask; + + if (attachment >= m_blend_state.attachmentCount) + { + m_blend_state.attachmentCount = attachment + 1u; + m_blend_state.pAttachments = m_blend_attachments.data(); + m_ci.pColorBlendState = &m_blend_state; + } +} + +void Vulkan::GraphicsPipelineBuilder::SetColorWriteMask(u32 attachment, VkColorComponentFlags write_mask) +{ + DebugAssert(attachment < MAX_ATTACHMENTS); + + VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[attachment]; + bs.colorWriteMask = write_mask; +} + +void Vulkan::GraphicsPipelineBuilder::AddBlendFlags(u32 flags) +{ + m_blend_state.flags |= flags; +} + +void Vulkan::GraphicsPipelineBuilder::ClearBlendAttachments() +{ + m_blend_attachments = {}; + m_blend_state.attachmentCount = 0; +} + +void Vulkan::GraphicsPipelineBuilder::SetNoBlendingState() +{ + ClearBlendAttachments(); + SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT); +} + +void Vulkan::GraphicsPipelineBuilder::AddDynamicState(VkDynamicState state) +{ + DebugAssert(m_dynamic_state.dynamicStateCount < MAX_DYNAMIC_STATE); + + m_dynamic_state_values[m_dynamic_state.dynamicStateCount] = state; + m_dynamic_state.dynamicStateCount++; + m_dynamic_state.pDynamicStates = m_dynamic_state_values.data(); + m_ci.pDynamicState = &m_dynamic_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetDynamicViewportAndScissorState() +{ + AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); + AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); +} + +void Vulkan::GraphicsPipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, + float max_depth) +{ + m_viewport.x = x; + m_viewport.y = y; + m_viewport.width = width; + m_viewport.height = height; + m_viewport.minDepth = min_depth; + m_viewport.maxDepth = max_depth; + + m_viewport_state.pViewports = &m_viewport; + m_viewport_state.viewportCount = 1u; + m_ci.pViewportState = &m_viewport_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) +{ + m_scissor.offset.x = x; + m_scissor.offset.y = y; + m_scissor.extent.width = width; + m_scissor.extent.height = height; + + m_viewport_state.pScissors = &m_scissor; + m_viewport_state.scissorCount = 1u; + m_ci.pViewportState = &m_viewport_state; +} + +void Vulkan::GraphicsPipelineBuilder::SetPipelineLayout(VkPipelineLayout layout) +{ + m_ci.layout = layout; +} + +void Vulkan::GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpass) +{ + m_ci.renderPass = render_pass; + m_ci.subpass = subpass; +} + +void Vulkan::GraphicsPipelineBuilder::SetProvokingVertex(VkProvokingVertexModeEXT mode) +{ + AddPointerToChain(&m_rasterization_state, &m_provoking_vertex); + + m_provoking_vertex.provokingVertexMode = mode; +} + +Vulkan::ComputePipelineBuilder::ComputePipelineBuilder() +{ + Clear(); +} + +void Vulkan::ComputePipelineBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + m_si = {}; + m_smap_entries = {}; + m_smap_constants = {}; +} + +VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/, + bool clear /*= true*/) +{ + VkPipeline pipeline; + VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: "); + return VK_NULL_HANDLE; + } + + if (clear) + Clear(); + + return pipeline; +} + +void Vulkan::ComputePipelineBuilder::SetShader(VkShaderModule module, const char* entry_point) +{ + m_ci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + m_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + m_ci.stage.module = module; + m_ci.stage.pName = entry_point; +} + +void Vulkan::ComputePipelineBuilder::SetPipelineLayout(VkPipelineLayout layout) +{ + m_ci.layout = layout; +} + +void Vulkan::ComputePipelineBuilder::SetSpecializationBool(u32 index, bool value) +{ + const u32 u32_value = static_cast(value); + SetSpecializationValue(index, u32_value); +} + +void Vulkan::ComputePipelineBuilder::SetSpecializationValue(u32 index, u32 value) +{ + if (m_si.mapEntryCount == 0) + { + m_si.pMapEntries = m_smap_entries.data(); + m_si.pData = m_smap_constants.data(); + m_ci.stage.pSpecializationInfo = &m_si; + } + + m_smap_entries[m_si.mapEntryCount++] = {index, index * SPECIALIZATION_CONSTANT_SIZE, SPECIALIZATION_CONSTANT_SIZE}; + m_si.dataSize += SPECIALIZATION_CONSTANT_SIZE; +} + +Vulkan::SamplerBuilder::SamplerBuilder() +{ + Clear(); +} + +void Vulkan::SamplerBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; +} + +VkSampler Vulkan::SamplerBuilder::Create(VkDevice device, bool clear /* = true */) +{ + VkSampler sampler; + VkResult res = vkCreateSampler(device, &m_ci, nullptr, &sampler); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSampler() failed: "); + return VK_NULL_HANDLE; + } + + return sampler; +} + +void Vulkan::SamplerBuilder::SetFilter(VkFilter mag_filter, VkFilter min_filter, VkSamplerMipmapMode mip_filter) +{ + m_ci.magFilter = mag_filter; + m_ci.minFilter = min_filter; + m_ci.mipmapMode = mip_filter; +} + +void Vulkan::SamplerBuilder::SetAddressMode(VkSamplerAddressMode u, VkSamplerAddressMode v, VkSamplerAddressMode w) +{ + m_ci.addressModeU = u; + m_ci.addressModeV = v; + m_ci.addressModeW = w; +} + +void Vulkan::SamplerBuilder::SetPointSampler( + VkSamplerAddressMode address_mode /* = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER */) +{ + Clear(); + SetFilter(VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST); + SetAddressMode(address_mode, address_mode, address_mode); +} + +void Vulkan::SamplerBuilder::SetLinearSampler( + bool mipmaps, VkSamplerAddressMode address_mode /* = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER */) +{ + Clear(); + SetFilter(VK_FILTER_LINEAR, VK_FILTER_LINEAR, + mipmaps ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST); + SetAddressMode(address_mode, address_mode, address_mode); + + if (mipmaps) + { + m_ci.minLod = std::numeric_limits::min(); + m_ci.maxLod = std::numeric_limits::max(); + } +} + +Vulkan::DescriptorSetUpdateBuilder::DescriptorSetUpdateBuilder() +{ + Clear(); +} + +void Vulkan::DescriptorSetUpdateBuilder::Clear() +{ + m_writes = {}; + m_num_writes = 0; +} + +void Vulkan::DescriptorSetUpdateBuilder::Update(VkDevice device, bool clear /*= true*/) +{ + DebugAssert(m_num_writes > 0); + + vkUpdateDescriptorSets(device, m_num_writes, (m_num_writes > 0) ? m_writes.data() : nullptr, 0, nullptr); + + if (clear) + Clear(); +} + +void Vulkan::DescriptorSetUpdateBuilder::PushUpdate(VkCommandBuffer cmdbuf, VkPipelineBindPoint bind_point, + VkPipelineLayout layout, u32 set, bool clear /*= true*/) +{ + DebugAssert(m_num_writes > 0); + + vkCmdPushDescriptorSetKHR(cmdbuf, bind_point, layout, set, m_num_writes, m_writes.data()); + + if (clear) + Clear(); +} + +void Vulkan::DescriptorSetUpdateBuilder::AddImageDescriptorWrite( + VkDescriptorSet set, u32 binding, VkImageView view, + VkImageLayout layout /*= VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); + + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = view; + ii.imageLayout = layout; + ii.sampler = VK_NULL_HANDLE; + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + dw.pImageInfo = ⅈ +} + +void Vulkan::DescriptorSetUpdateBuilder::AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); + + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = VK_NULL_HANDLE; + ii.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ii.sampler = sampler; + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + dw.pImageInfo = ⅈ +} + +void Vulkan::DescriptorSetUpdateBuilder::AddSamplerDescriptorWrites(VkDescriptorSet set, u32 binding, + const VkSampler* samplers, u32 num_samplers) +{ + DebugAssert(m_num_writes < MAX_WRITES && (m_num_image_infos + num_samplers) < MAX_IMAGE_INFOS); + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = num_samplers; + dw.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + dw.pImageInfo = &m_image_infos[m_num_image_infos]; + + for (u32 i = 0; i < num_samplers; i++) + { + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = VK_NULL_HANDLE; + ii.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + ii.sampler = samplers[i]; + } +} + +void Vulkan::DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrite( + VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler, + VkImageLayout layout /*= VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL*/) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); + + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = view; + ii.imageLayout = layout; + ii.sampler = sampler; + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + dw.pImageInfo = ⅈ +} + +void Vulkan::DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrites( + VkDescriptorSet set, u32 binding, const VkImageView* views, const VkSampler* samplers, u32 num_views, + VkImageLayout layout /* = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL */) +{ + DebugAssert(m_num_writes < MAX_WRITES && (m_num_image_infos + num_views) < MAX_IMAGE_INFOS); + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = num_views; + dw.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + dw.pImageInfo = &m_image_infos[m_num_image_infos]; + + for (u32 i = 0; i < num_views; i++) + { + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = views[i]; + ii.sampler = samplers[i]; + ii.imageLayout = layout; + } +} + +void Vulkan::DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, + VkDescriptorType dtype, VkBuffer buffer, u32 offset, + u32 size) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_buffer_infos < MAX_BUFFER_INFOS); + + VkDescriptorBufferInfo& bi = m_buffer_infos[m_num_buffer_infos++]; + bi.buffer = buffer; + bi.offset = offset; + bi.range = size; + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = dtype; + dw.pBufferInfo = &bi; +} + +void Vulkan::DescriptorSetUpdateBuilder::AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, + VkDescriptorType dtype, VkBufferView view) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_views < MAX_VIEWS); + + VkBufferView& bi = m_views[m_num_views++]; + bi = view; + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = dtype; + dw.pTexelBufferView = &bi; +} + +void Vulkan::DescriptorSetUpdateBuilder::AddInputAttachmentDescriptorWrite( + VkDescriptorSet set, u32 binding, VkImageView view, VkImageLayout layout /*= VK_IMAGE_LAYOUT_GENERAL*/) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + dw.pImageInfo = &m_image_infos[m_num_image_infos]; + + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = view; + ii.imageLayout = layout; + ii.sampler = VK_NULL_HANDLE; +} + +void Vulkan::DescriptorSetUpdateBuilder::AddStorageImageDescriptorWrite( + VkDescriptorSet set, u32 binding, VkImageView view, VkImageLayout layout /*= VK_IMAGE_LAYOUT_GENERAL*/) +{ + DebugAssert(m_num_writes < MAX_WRITES && m_num_image_infos < MAX_IMAGE_INFOS); + + VkWriteDescriptorSet& dw = m_writes[m_num_writes++]; + dw.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + dw.dstSet = set; + dw.dstBinding = binding; + dw.descriptorCount = 1; + dw.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + dw.pImageInfo = &m_image_infos[m_num_image_infos]; + + VkDescriptorImageInfo& ii = m_image_infos[m_num_image_infos++]; + ii.imageView = view; + ii.imageLayout = layout; + ii.sampler = VK_NULL_HANDLE; +} + +Vulkan::FramebufferBuilder::FramebufferBuilder() +{ + Clear(); +} + +void Vulkan::FramebufferBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + m_images = {}; +} + +VkFramebuffer Vulkan::FramebufferBuilder::Create(VkDevice device, bool clear /*= true*/) +{ + VkFramebuffer fb; + VkResult res = vkCreateFramebuffer(device, &m_ci, nullptr, &fb); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFramebuffer() failed: "); + return VK_NULL_HANDLE; + } + + if (clear) + Clear(); + + return fb; +} + +void Vulkan::FramebufferBuilder::AddAttachment(VkImageView image) +{ + DebugAssert(m_ci.attachmentCount < MAX_ATTACHMENTS); + + m_images[m_ci.attachmentCount] = image; + + m_ci.attachmentCount++; + m_ci.pAttachments = m_images.data(); +} + +void Vulkan::FramebufferBuilder::SetSize(u32 width, u32 height, u32 layers) +{ + m_ci.width = width; + m_ci.height = height; + m_ci.layers = layers; +} + +void Vulkan::FramebufferBuilder::SetRenderPass(VkRenderPass render_pass) +{ + m_ci.renderPass = render_pass; +} + +Vulkan::RenderPassBuilder::RenderPassBuilder() +{ + Clear(); +} + +void Vulkan::RenderPassBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + m_attachments = {}; + m_attachment_references = {}; + m_num_attachment_references = 0; + m_subpasses = {}; +} + +VkRenderPass Vulkan::RenderPassBuilder::Create(VkDevice device, bool clear /*= true*/) +{ + VkRenderPass rp; + VkResult res = vkCreateRenderPass(device, &m_ci, nullptr, &rp); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass() failed: "); + return VK_NULL_HANDLE; + } + + return rp; +} + +u32 Vulkan::RenderPassBuilder::AddAttachment(VkFormat format, VkSampleCountFlagBits samples, VkAttachmentLoadOp load_op, + VkAttachmentStoreOp store_op, VkImageLayout initial_layout, + VkImageLayout final_layout) +{ + DebugAssert(m_ci.attachmentCount < MAX_ATTACHMENTS); + + const u32 index = m_ci.attachmentCount; + VkAttachmentDescription& ad = m_attachments[index]; + ad.format = format; + ad.samples = samples; + ad.loadOp = load_op; + ad.storeOp = store_op; + ad.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + ad.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + ad.initialLayout = initial_layout; + ad.finalLayout = final_layout; + + m_ci.attachmentCount++; + m_ci.pAttachments = m_attachments.data(); + + return index; +} + +u32 Vulkan::RenderPassBuilder::AddSubpass() +{ + DebugAssert(m_ci.subpassCount < MAX_SUBPASSES); + + const u32 index = m_ci.subpassCount; + VkSubpassDescription& sp = m_subpasses[index]; + sp.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + + m_ci.subpassCount++; + m_ci.pSubpasses = m_subpasses.data(); + + return index; +} + +void Vulkan::RenderPassBuilder::AddSubpassColorAttachment(u32 subpass, u32 attachment, VkImageLayout layout) +{ + DebugAssert(subpass < m_ci.subpassCount && m_num_attachment_references < MAX_ATTACHMENT_REFERENCES); + + VkAttachmentReference& ar = m_attachment_references[m_num_attachment_references++]; + ar.attachment = attachment; + ar.layout = layout; + + VkSubpassDescription& sp = m_subpasses[subpass]; + if (sp.colorAttachmentCount == 0) + sp.pColorAttachments = &ar; + sp.colorAttachmentCount++; +} + +void Vulkan::RenderPassBuilder::AddSubpassDepthAttachment(u32 subpass, u32 attachment, VkImageLayout layout) +{ + DebugAssert(subpass < m_ci.subpassCount && m_num_attachment_references < MAX_ATTACHMENT_REFERENCES); + + VkAttachmentReference& ar = m_attachment_references[m_num_attachment_references++]; + ar.attachment = attachment; + ar.layout = layout; + + VkSubpassDescription& sp = m_subpasses[subpass]; + sp.pDepthStencilAttachment = &ar; +} + +Vulkan::BufferViewBuilder::BufferViewBuilder() +{ + Clear(); +} + +void Vulkan::BufferViewBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; +} + +VkBufferView Vulkan::BufferViewBuilder::Create(VkDevice device, bool clear /*= true*/) +{ + VkBufferView bv; + VkResult res = vkCreateBufferView(device, &m_ci, nullptr, &bv); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBufferView() failed: "); + return VK_NULL_HANDLE; + } + + return bv; +} + +void Vulkan::BufferViewBuilder::Set(VkBuffer buffer, VkFormat format, u32 offset, u32 size) +{ + m_ci.buffer = buffer; + m_ci.format = format; + m_ci.offset = offset; + m_ci.range = size; +} diff --git a/src/util/vulkan_builders.h b/src/util/vulkan_builders.h new file mode 100644 index 000000000..ccec95cfd --- /dev/null +++ b/src/util/vulkan_builders.h @@ -0,0 +1,416 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "vulkan_loader.h" + +#include "common/string_util.h" + +#include +#include +#include + +#ifdef _DEBUG +#define ENABLE_VULKAN_DEBUG_OBJECTS 1 +#endif + +#define LOG_VULKAN_ERROR(res, ...) ::Vulkan::LogVulkanResult(__func__, res, __VA_ARGS__) + +namespace Vulkan { +// Adds a structure to a chain. +void AddPointerToChain(void* head, const void* ptr); + +const char* VkResultToString(VkResult res); +void LogVulkanResult(const char* func_name, VkResult res, const char* msg, ...); + +class DescriptorSetLayoutBuilder +{ +public: + enum : u32 + { + MAX_BINDINGS = 16, + }; + + DescriptorSetLayoutBuilder(); + + void Clear(); + void SetPushFlag(); + + VkDescriptorSetLayout Create(VkDevice device); + + void AddBinding(u32 binding, VkDescriptorType dtype, u32 dcount, VkShaderStageFlags stages); + +private: + VkDescriptorSetLayoutCreateInfo m_ci{}; + std::array m_bindings{}; +}; + +class PipelineLayoutBuilder +{ +public: + enum : u32 + { + MAX_SETS = 8, + MAX_PUSH_CONSTANTS = 1 + }; + + PipelineLayoutBuilder(); + + void Clear(); + + VkPipelineLayout Create(VkDevice device); + + void AddDescriptorSet(VkDescriptorSetLayout layout); + + void AddPushConstants(VkShaderStageFlags stages, u32 offset, u32 size); + +private: + VkPipelineLayoutCreateInfo m_ci{}; + std::array m_sets{}; + std::array m_push_constants{}; +}; + +class GraphicsPipelineBuilder +{ +public: + enum : u32 + { + MAX_SHADER_STAGES = 3, + MAX_VERTEX_ATTRIBUTES = 16, + MAX_VERTEX_BUFFERS = 8, + MAX_ATTACHMENTS = 2, + MAX_DYNAMIC_STATE = 8 + }; + + GraphicsPipelineBuilder(); + + void Clear(); + + VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true); + + void SetShaderStage(VkShaderStageFlagBits stage, VkShaderModule module, const char* entry_point); + void SetVertexShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_VERTEX_BIT, module, "main"); } + void SetGeometryShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_GEOMETRY_BIT, module, "main"); } + void SetFragmentShader(VkShaderModule module) { SetShaderStage(VK_SHADER_STAGE_FRAGMENT_BIT, module, "main"); } + + void AddVertexBuffer(u32 binding, u32 stride, VkVertexInputRate input_rate = VK_VERTEX_INPUT_RATE_VERTEX); + void AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset); + + void SetPrimitiveTopology(VkPrimitiveTopology topology, bool enable_primitive_restart = false); + + void SetRasterizationState(VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, VkFrontFace front_face); + void SetLineWidth(float width); + void SetLineRasterizationMode(VkLineRasterizationModeEXT mode); + void SetMultisamples(VkSampleCountFlagBits samples); + void SetMultisamples(u32 multisamples, bool per_sample_shading); + void SetNoCullRasterizationState(); + + void SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op); + void SetStencilState(bool stencil_test, const VkStencilOpState& front, const VkStencilOpState& back); + void SetNoDepthTestState(); + void SetNoStencilState(); + + void AddBlendAttachment(bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, VkBlendOp op, + VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op, + VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); + void SetBlendAttachment(u32 attachment, bool blend_enable, VkBlendFactor src_factor, VkBlendFactor dst_factor, + VkBlendOp op, VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, + VkBlendOp alpha_op, + VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); + void SetColorWriteMask(u32 attachment, + VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); + void AddBlendFlags(u32 flags); + void ClearBlendAttachments(); + + void SetBlendConstants(float r, float g, float b, float a); + void SetNoBlendingState(); + + void AddDynamicState(VkDynamicState state); + + void SetDynamicViewportAndScissorState(); + void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth); + void SetScissorRect(s32 x, s32 y, u32 width, u32 height); + + void SetPipelineLayout(VkPipelineLayout layout); + void SetRenderPass(VkRenderPass render_pass, u32 subpass); + + void SetProvokingVertex(VkProvokingVertexModeEXT mode); + +private: + VkGraphicsPipelineCreateInfo m_ci; + std::array m_shader_stages; + + VkPipelineVertexInputStateCreateInfo m_vertex_input_state; + std::array m_vertex_buffers; + std::array m_vertex_attributes; + + VkPipelineInputAssemblyStateCreateInfo m_input_assembly; + + VkPipelineRasterizationStateCreateInfo m_rasterization_state; + VkPipelineDepthStencilStateCreateInfo m_depth_state; + + VkPipelineColorBlendStateCreateInfo m_blend_state; + std::array m_blend_attachments; + + VkPipelineViewportStateCreateInfo m_viewport_state; + VkViewport m_viewport; + VkRect2D m_scissor; + + VkPipelineDynamicStateCreateInfo m_dynamic_state; + std::array m_dynamic_state_values; + + VkPipelineMultisampleStateCreateInfo m_multisample_state; + + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT m_provoking_vertex; + VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state; +}; + +class ComputePipelineBuilder +{ +public: + enum : u32 + { + SPECIALIZATION_CONSTANT_SIZE = 4, + MAX_SPECIALIZATION_CONSTANTS = 4, + }; + + ComputePipelineBuilder(); + + void Clear(); + + VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true); + + void SetShader(VkShaderModule module, const char* entry_point); + + void SetPipelineLayout(VkPipelineLayout layout); + + void SetSpecializationBool(u32 index, bool value); + +private: + void SetSpecializationValue(u32 index, u32 value); + + VkComputePipelineCreateInfo m_ci; + + VkSpecializationInfo m_si; + std::array m_smap_entries; + std::array m_smap_constants; +}; + +class SamplerBuilder +{ +public: + SamplerBuilder(); + + void Clear(); + + VkSampler Create(VkDevice device, bool clear = true); + + void SetFilter(VkFilter mag_filter, VkFilter min_filter, VkSamplerMipmapMode mip_filter); + void SetAddressMode(VkSamplerAddressMode u, VkSamplerAddressMode v, VkSamplerAddressMode w); + + void SetPointSampler(VkSamplerAddressMode address_mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + void SetLinearSampler(bool mipmaps, VkSamplerAddressMode address_mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + +private: + VkSamplerCreateInfo m_ci; +}; + +class DescriptorSetUpdateBuilder +{ + enum : u32 + { + MAX_WRITES = 16, + MAX_IMAGE_INFOS = 8, + MAX_BUFFER_INFOS = 4, + MAX_VIEWS = 4, + }; + +public: + DescriptorSetUpdateBuilder(); + + void Clear(); + + void Update(VkDevice device, bool clear = true); + void PushUpdate(VkCommandBuffer cmdbuf, VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 set, + bool clear = true); + + void AddImageDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, + VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler); + void AddSamplerDescriptorWrites(VkDescriptorSet set, u32 binding, const VkSampler* samplers, u32 num_samplers); + void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler, + VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + void AddCombinedImageSamplerDescriptorWrites(VkDescriptorSet set, u32 binding, const VkImageView* views, + const VkSampler* samplers, u32 num_views, + VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer, u32 offset, + u32 size); + void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view); + void AddInputAttachmentDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, + VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL); + void AddStorageImageDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, + VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL); + +private: + std::array m_writes; + u32 m_num_writes = 0; + + std::array m_buffer_infos; + std::array m_image_infos; + std::array m_views; + u32 m_num_buffer_infos = 0; + u32 m_num_image_infos = 0; + u32 m_num_views = 0; +}; + +class FramebufferBuilder +{ + enum : u32 + { + MAX_ATTACHMENTS = 2, + }; + +public: + FramebufferBuilder(); + + void Clear(); + + VkFramebuffer Create(VkDevice device, bool clear = true); + + void AddAttachment(VkImageView image); + + void SetSize(u32 width, u32 height, u32 layers); + + void SetRenderPass(VkRenderPass render_pass); + +private: + VkFramebufferCreateInfo m_ci; + std::array m_images; +}; + +class RenderPassBuilder +{ + enum : u32 + { + MAX_ATTACHMENTS = 2, + MAX_ATTACHMENT_REFERENCES = 2, + MAX_SUBPASSES = 1, + }; + +public: + RenderPassBuilder(); + + void Clear(); + + VkRenderPass Create(VkDevice device, bool clear = true); + + u32 AddAttachment(VkFormat format, VkSampleCountFlagBits samples, VkAttachmentLoadOp load_op, + VkAttachmentStoreOp store_op, VkImageLayout initial_layout, VkImageLayout final_layout); + + u32 AddSubpass(); + void AddSubpassColorAttachment(u32 subpass, u32 attachment, VkImageLayout layout); + void AddSubpassDepthAttachment(u32 subpass, u32 attachment, VkImageLayout layout); + +private: + VkRenderPassCreateInfo m_ci; + std::array m_attachments; + std::array m_attachment_references; + u32 m_num_attachment_references = 0; + std::array m_subpasses; +}; + +class BufferViewBuilder +{ +public: + BufferViewBuilder(); + + void Clear(); + + VkBufferView Create(VkDevice device, bool clear = true); + + void Set(VkBuffer buffer, VkFormat format, u32 offset, u32 size); + +private: + VkBufferViewCreateInfo m_ci; +}; + +#ifdef ENABLE_VULKAN_DEBUG_OBJECTS + +// Provides a compile-time mapping between a Vulkan-type into its matching VkObjectType +template +struct VkObjectTypeMap; + +// clang-format off + template<> struct VkObjectTypeMap { using type = VkInstance; static constexpr VkObjectType value = VK_OBJECT_TYPE_INSTANCE; }; + template<> struct VkObjectTypeMap { using type = VkPhysicalDevice; static constexpr VkObjectType value = VK_OBJECT_TYPE_PHYSICAL_DEVICE; }; + template<> struct VkObjectTypeMap { using type = VkDevice; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEVICE; }; + template<> struct VkObjectTypeMap { using type = VkQueue; static constexpr VkObjectType value = VK_OBJECT_TYPE_QUEUE; }; + template<> struct VkObjectTypeMap { using type = VkSemaphore; static constexpr VkObjectType value = VK_OBJECT_TYPE_SEMAPHORE; }; + template<> struct VkObjectTypeMap { using type = VkCommandBuffer; static constexpr VkObjectType value = VK_OBJECT_TYPE_COMMAND_BUFFER; }; + template<> struct VkObjectTypeMap { using type = VkFence; static constexpr VkObjectType value = VK_OBJECT_TYPE_FENCE; }; + template<> struct VkObjectTypeMap { using type = VkDeviceMemory; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEVICE_MEMORY; }; + template<> struct VkObjectTypeMap { using type = VkBuffer; static constexpr VkObjectType value = VK_OBJECT_TYPE_BUFFER; }; + template<> struct VkObjectTypeMap { using type = VkImage; static constexpr VkObjectType value = VK_OBJECT_TYPE_IMAGE; }; + template<> struct VkObjectTypeMap { using type = VkEvent; static constexpr VkObjectType value = VK_OBJECT_TYPE_EVENT; }; + template<> struct VkObjectTypeMap { using type = VkQueryPool; static constexpr VkObjectType value = VK_OBJECT_TYPE_QUERY_POOL; }; + template<> struct VkObjectTypeMap { using type = VkBufferView; static constexpr VkObjectType value = VK_OBJECT_TYPE_BUFFER_VIEW; }; + template<> struct VkObjectTypeMap { using type = VkImageView; static constexpr VkObjectType value = VK_OBJECT_TYPE_IMAGE_VIEW; }; + template<> struct VkObjectTypeMap { using type = VkShaderModule; static constexpr VkObjectType value = VK_OBJECT_TYPE_SHADER_MODULE; }; + template<> struct VkObjectTypeMap { using type = VkPipelineCache; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE_CACHE; }; + template<> struct VkObjectTypeMap { using type = VkPipelineLayout; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE_LAYOUT; }; + template<> struct VkObjectTypeMap { using type = VkRenderPass; static constexpr VkObjectType value = VK_OBJECT_TYPE_RENDER_PASS; }; + template<> struct VkObjectTypeMap { using type = VkPipeline; static constexpr VkObjectType value = VK_OBJECT_TYPE_PIPELINE; }; + template<> struct VkObjectTypeMap { using type = VkDescriptorSetLayout; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT; }; + template<> struct VkObjectTypeMap { using type = VkSampler; static constexpr VkObjectType value = VK_OBJECT_TYPE_SAMPLER; }; + template<> struct VkObjectTypeMap { using type = VkDescriptorPool; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_POOL; }; + template<> struct VkObjectTypeMap { using type = VkDescriptorSet; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_SET; }; + template<> struct VkObjectTypeMap { using type = VkFramebuffer; static constexpr VkObjectType value = VK_OBJECT_TYPE_FRAMEBUFFER; }; + template<> struct VkObjectTypeMap { using type = VkCommandPool; static constexpr VkObjectType value = VK_OBJECT_TYPE_COMMAND_POOL; }; + template<> struct VkObjectTypeMap { using type = VkDescriptorUpdateTemplate; static constexpr VkObjectType value = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE; }; + template<> struct VkObjectTypeMap { using type = VkSurfaceKHR; static constexpr VkObjectType value = VK_OBJECT_TYPE_SURFACE_KHR; }; + template<> struct VkObjectTypeMap { using type = VkSwapchainKHR; static constexpr VkObjectType value = VK_OBJECT_TYPE_SWAPCHAIN_KHR; }; + template<> struct VkObjectTypeMap { using type = VkDebugUtilsMessengerEXT; static constexpr VkObjectType value = VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT; }; +// clang-format on + +#endif + +static inline void SetFormattedObjectName(VkDevice device, void* object_handle, VkObjectType object_type, + const char* format, va_list ap) +{ +#ifdef ENABLE_VULKAN_DEBUG_OBJECTS + if (!vkSetDebugUtilsObjectNameEXT) + { + return; + } + + const std::string str(StringUtil::StdStringFromFormatV(format, ap)); + const VkDebugUtilsObjectNameInfoEXT nameInfo{VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr, object_type, + reinterpret_cast(object_handle), str.c_str()}; + vkSetDebugUtilsObjectNameEXT(device, &nameInfo); +#endif +} + +template +static inline void SetFormattedObjectName(VkDevice device, T object_handle, const char* format, ...) +{ +#ifdef ENABLE_VULKAN_DEBUG_OBJECTS + std::va_list ap; + va_start(ap, format); + SetFormattedObjectName(device, reinterpret_cast((typename VkObjectTypeMap::type)object_handle), + VkObjectTypeMap::value, format, ap); + va_end(ap); +#endif +} + +template +static inline void SetObjectName(VkDevice device, T object_handle, const std::string_view& sv) +{ +#ifdef ENABLE_VULKAN_DEBUG_OBJECTS + SetFormattedObjectName(device, object_handle, "%.*s", static_cast(sv.length()), sv.data()); +#endif +} +} // namespace Vulkan \ No newline at end of file diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp new file mode 100644 index 000000000..885d9a2f4 --- /dev/null +++ b/src/util/vulkan_device.cpp @@ -0,0 +1,3181 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "vulkan_device.h" +#include "postprocessing_chain.h" // TODO: Remove me +#include "vulkan_builders.h" +#include "vulkan_pipeline.h" +#include "vulkan_stream_buffer.h" +#include "vulkan_swap_chain.h" +#include "vulkan_texture.h" + +#include "core/host.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/bitutils.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/path.h" +#include "common/scoped_guard.h" +#include "common/string.h" + +#include "fmt/format.h" + +#include +#include + +Log_SetChannel(VulkanDevice); + +// TODO: VK_KHR_display. + +#pragma pack(push, 4) +struct VK_PIPELINE_CACHE_HEADER +{ + u32 header_length; + u32 header_version; + u32 vendor_id; + u32 device_id; + u8 uuid[VK_UUID_SIZE]; +}; +#pragma pack(pop) + +// Tweakables +enum : u32 +{ + MIN_TEXEL_BUFFER_ELEMENTS = 1024 * 512, + + MAX_DRAW_CALLS_PER_FRAME = 2048, + MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME, + MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, + + VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, + INDEX_BUFFER_SIZE = 16 * 1024 * 1024, + VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, + + UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + UNIFORM_PUSH_CONSTANTS_SIZE = 128, + + MAX_UNIFORM_BUFFER_SIZE = 1024, +}; + +const std::array(GPUTexture::Format::MaxCount)> VulkanDevice::TEXTURE_FORMAT_MAPPING = { + VK_FORMAT_UNDEFINED, // Unknown + VK_FORMAT_R8G8B8A8_UNORM, // RGBA8 + VK_FORMAT_B8G8R8A8_UNORM, // BGRA8 + VK_FORMAT_R5G6B5_UNORM_PACK16, // RGB565 + VK_FORMAT_R5G5B5A1_UNORM_PACK16, // RGBA5551 + VK_FORMAT_R8_UNORM, // R8 + VK_FORMAT_D16_UNORM, // D16 + VK_FORMAT_R16_UNORM, // R16 + VK_FORMAT_R16_SFLOAT, // R16F + VK_FORMAT_R32_SINT, // R32I + VK_FORMAT_R32_UINT, // R32U + VK_FORMAT_R32_SFLOAT, // R32F + VK_FORMAT_R8G8_UNORM, // RG8 + VK_FORMAT_R16G16_UNORM, // RG16 + VK_FORMAT_R16G16_SFLOAT, // RG16F + VK_FORMAT_R32G32_SFLOAT, // RG32F + VK_FORMAT_R16G16B16A16_UNORM, // RGBA16 + VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F + VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F + VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2 +}; + +static constexpr VkClearValue s_present_clear_color = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; + +#ifdef _DEBUG +static u32 s_debug_scope_depth = 0; +#endif + +// We need to synchronize instance creation because of adapter enumeration from the UI thread. +static std::mutex s_instance_mutex; + +VulkanDevice::VulkanDevice() +{ +#ifdef _DEBUG + s_debug_scope_depth = 0; +#endif +} + +VulkanDevice::~VulkanDevice() +{ + Assert(m_device == VK_NULL_HANDLE); +} + +GPUTexture::Format VulkanDevice::GetFormatForVkFormat(VkFormat format) +{ + for (u32 i = 0; i < static_cast(std::size(TEXTURE_FORMAT_MAPPING)); i++) + { + if (TEXTURE_FORMAT_MAPPING[i] == format) + return static_cast(i); + } + + return GPUTexture::Format::Unknown; +} + +VkInstance VulkanDevice::CreateVulkanInstance(const WindowInfo& wi, bool enable_debug_utils, + bool enable_validation_layer) +{ + ExtensionList enabled_extensions; + if (!SelectInstanceExtensions(&enabled_extensions, wi, enable_debug_utils)) + return VK_NULL_HANDLE; + + // Remember to manually update this every release. We don't pull in svnrev.h here, because + // it's only the major/minor version, and rebuilding the file every time something else changes + // is unnecessary. + VkApplicationInfo app_info = {}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pNext = nullptr; + app_info.pApplicationName = "DuckStation"; + app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.pEngineName = "DuckStation"; + app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); + app_info.apiVersion = VK_API_VERSION_1_1; + + VkInstanceCreateInfo instance_create_info = {}; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_create_info.pNext = nullptr; + instance_create_info.flags = 0; + instance_create_info.pApplicationInfo = &app_info; + instance_create_info.enabledExtensionCount = static_cast(enabled_extensions.size()); + instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); + instance_create_info.enabledLayerCount = 0; + instance_create_info.ppEnabledLayerNames = nullptr; + + // Enable debug layer on debug builds + if (enable_validation_layer) + { + static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"}; + instance_create_info.enabledLayerCount = 1; + instance_create_info.ppEnabledLayerNames = layer_names; + } + + VkInstance instance; + VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateInstance failed: "); + return nullptr; + } + + return instance; +} + +bool VulkanDevice::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, + bool enable_debug_utils) +{ + u32 extension_count = 0; + VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); + return false; + } + + if (extension_count == 0) + { + Log_ErrorPrintf("Vulkan: No extensions supported by instance."); + return false; + } + + std::vector available_extension_list(extension_count); + res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data()); + DebugAssert(res == VK_SUCCESS); + + auto SupportsExtension = [&](const char* name, bool required) { + if (std::find_if(available_extension_list.begin(), available_extension_list.end(), + [&](const VkExtensionProperties& properties) { + return !strcmp(name, properties.extensionName); + }) != available_extension_list.end()) + { + Log_DevPrintf("Enabling extension: %s", name); + extension_list->push_back(name); + return true; + } + + if (required) + Log_ErrorPrintf("Vulkan: Missing required extension %s.", name); + + return false; + }; + + // Common extensions + if (wi.type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true)) + return false; + +#if defined(VK_USE_PLATFORM_WIN32_KHR) + if (wi.type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true)) + return false; +#endif +#if defined(VK_USE_PLATFORM_XLIB_KHR) + if (wi.type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true)) + return false; +#endif +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) + if (wi.type == WindowInfo::Type::Wayland && !SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true)) + return false; +#endif +#if defined(VK_USE_PLATFORM_METAL_EXT) + if (wi.type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true)) + return false; +#endif + + // VK_EXT_debug_utils + if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false)) + Log_WarningPrintf("Vulkan: Debug report requested, but extension is not available."); + + // Needed for exclusive fullscreen control. + SupportsExtension(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME, false); + + return true; +} + +VulkanDevice::GPUList VulkanDevice::EnumerateGPUs(VkInstance instance) +{ + GPUList gpus; + + u32 gpu_count = 0; + VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr); + if ((res != VK_SUCCESS && res != VK_INCOMPLETE) || gpu_count == 0) + { + LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (1) failed: "); + return gpus; + } + + std::vector physical_devices(gpu_count); + res = vkEnumeratePhysicalDevices(instance, &gpu_count, physical_devices.data()); + if (res == VK_INCOMPLETE) + { + Log_WarningPrintf("First vkEnumeratePhysicalDevices() call returned %zu devices, but second returned %u", + physical_devices.size(), gpu_count); + } + else if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (2) failed: "); + return gpus; + } + + // Maybe we lost a GPU? + if (gpu_count < physical_devices.size()) + physical_devices.resize(gpu_count); + + gpus.reserve(physical_devices.size()); + for (VkPhysicalDevice device : physical_devices) + { + VkPhysicalDeviceProperties props = {}; + vkGetPhysicalDeviceProperties(device, &props); + + std::string gpu_name = props.deviceName; + + // handle duplicate adapter names + if (std::any_of(gpus.begin(), gpus.end(), [&gpu_name](const auto& other) { return (gpu_name == other.second); })) + { + std::string original_adapter_name = std::move(gpu_name); + + u32 current_extra = 2; + do + { + gpu_name = fmt::format("{} ({})", original_adapter_name, current_extra); + current_extra++; + } while ( + std::any_of(gpus.begin(), gpus.end(), [&gpu_name](const auto& other) { return (gpu_name == other.second); })); + } + + gpus.emplace_back(device, std::move(gpu_name)); + } + + return gpus; +} + +bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface) +{ + u32 extension_count = 0; + VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: "); + return false; + } + + if (extension_count == 0) + { + Log_ErrorPrintf("Vulkan: No extensions supported by device."); + return false; + } + + std::vector available_extension_list(extension_count); + res = + vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, available_extension_list.data()); + DebugAssert(res == VK_SUCCESS); + + auto SupportsExtension = [&](const char* name, bool required) { + if (std::find_if(available_extension_list.begin(), available_extension_list.end(), + [&](const VkExtensionProperties& properties) { + return !strcmp(name, properties.extensionName); + }) != available_extension_list.end()) + { + if (std::none_of(extension_list->begin(), extension_list->end(), + [&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); })) + { + Log_DevPrintf("Enabling extension: %s", name); + extension_list->push_back(name); + } + + return true; + } + + if (required) + Log_ErrorPrintf("Vulkan: Missing required extension %s.", name); + + return false; + }; + + if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true)) + return false; + + m_optional_extensions.vk_ext_memory_budget = SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false); + m_optional_extensions.vk_ext_rasterization_order_attachment_access = + SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) || + SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false); + m_optional_extensions.vk_ext_attachment_feedback_loop_layout = + SupportsExtension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); + +#ifdef _WIN32 + m_optional_extensions.vk_ext_full_screen_exclusive = + enable_surface && SupportsExtension(VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, false); + Log_InfoPrintf("VK_EXT_full_screen_exclusive is %s", + m_optional_extensions.vk_ext_full_screen_exclusive ? "supported" : "NOT supported"); +#endif + + return true; +} + +bool VulkanDevice::SelectDeviceFeatures() +{ + VkPhysicalDeviceFeatures available_features; + vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); + + // Enable the features we use. + m_device_features.dualSrcBlend = available_features.dualSrcBlend; + m_device_features.largePoints = available_features.largePoints; + m_device_features.wideLines = available_features.wideLines; + m_device_features.samplerAnisotropy = available_features.samplerAnisotropy; + + return true; +} + +bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer) +{ + u32 queue_family_count; + vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr); + if (queue_family_count == 0) + { + Log_ErrorPrintf("No queue families found on specified vulkan physical device."); + return false; + } + + std::vector queue_family_properties(queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data()); + Log_DevPrintf("%u vulkan queue families", queue_family_count); + + // Find graphics and present queues. + m_graphics_queue_family_index = queue_family_count; + m_present_queue_family_index = queue_family_count; + for (uint32_t i = 0; i < queue_family_count; i++) + { + VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT; + if (graphics_supported) + { + m_graphics_queue_family_index = i; + // Quit now, no need for a present queue. + if (!surface) + { + break; + } + } + + if (surface) + { + VkBool32 present_supported; + VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); + return false; + } + + if (present_supported) + { + m_present_queue_family_index = i; + } + + // Prefer one queue family index that does both graphics and present. + if (graphics_supported && present_supported) + { + break; + } + } + } + if (m_graphics_queue_family_index == queue_family_count) + { + Log_ErrorPrintf("Vulkan: Failed to find an acceptable graphics queue."); + return false; + } + if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count) + { + Log_ErrorPrintf("Vulkan: Failed to find an acceptable present queue."); + return false; + } + + VkDeviceCreateInfo device_info = {}; + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.pNext = nullptr; + device_info.flags = 0; + device_info.queueCreateInfoCount = 0; + + static constexpr float queue_priorities[] = {1.0f}; + std::array queue_infos; + VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++]; + graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + graphics_queue_info.pNext = nullptr; + graphics_queue_info.flags = 0; + graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index; + graphics_queue_info.queueCount = 1; + graphics_queue_info.pQueuePriorities = queue_priorities; + + if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index) + { + VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++]; + present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + present_queue_info.pNext = nullptr; + present_queue_info.flags = 0; + present_queue_info.queueFamilyIndex = m_present_queue_family_index; + present_queue_info.queueCount = 1; + present_queue_info.pQueuePriorities = queue_priorities; + } + + device_info.pQueueCreateInfos = queue_infos.data(); + + ExtensionList enabled_extensions; + if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE)) + return false; + + device_info.enabledExtensionCount = static_cast(enabled_extensions.size()); + device_info.ppEnabledExtensionNames = enabled_extensions.data(); + + // Check for required features before creating. + if (!SelectDeviceFeatures()) + return false; + + device_info.pEnabledFeatures = &m_device_features; + + // Enable debug layer on debug builds + if (enable_validation_layer) + { + static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"}; + device_info.enabledLayerCount = 1; + device_info.ppEnabledLayerNames = layer_names; + } + + VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT}; + VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT}; + + if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) + { + rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess = VK_TRUE; + Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); + } + if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) + { + attachment_feedback_loop_feature.attachmentFeedbackLoopLayout = VK_TRUE; + Vulkan::AddPointerToChain(&device_info, &attachment_feedback_loop_feature); + } + + VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDevice failed: "); + return false; + } + + // With the device created, we can fill the remaining entry points. + if (!Vulkan::LoadVulkanDeviceFunctions(m_device)) + return false; + + // Grab the graphics and present queues. + vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue); + if (surface) + vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue); + + m_features.gpu_timing = (m_device_properties.limits.timestampComputeAndGraphics != 0 && + queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 && + m_device_properties.limits.timestampPeriod > 0); + Log_DevPrintf("GPU timing is %s (TS=%u TS valid bits=%u, TS period=%f)", + m_features.gpu_timing ? "supported" : "not supported", + static_cast(m_device_properties.limits.timestampComputeAndGraphics), + queue_family_properties[m_graphics_queue_family_index].timestampValidBits, + m_device_properties.limits.timestampPeriod); + + ProcessDeviceExtensions(); + return true; +} + +void VulkanDevice::ProcessDeviceExtensions() +{ + // advanced feature checks + VkPhysicalDeviceFeatures2 features2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; + VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT}; + VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT}; + + // add in optional feature structs + if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) + Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); + if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) + Vulkan::AddPointerToChain(&features2, &attachment_feedback_loop_feature); + + // query + vkGetPhysicalDeviceFeatures2(m_physical_device, &features2); + + // confirm we actually support it + m_optional_extensions.vk_ext_rasterization_order_attachment_access &= + (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); + m_optional_extensions.vk_ext_attachment_feedback_loop_layout &= + (attachment_feedback_loop_feature.attachmentFeedbackLoopLayout == VK_TRUE); + + VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR}; + + if (m_optional_extensions.vk_khr_driver_properties) + { + m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + Vulkan::AddPointerToChain(&properties2, &m_device_driver_properties); + } + if (m_optional_extensions.vk_khr_push_descriptor) + Vulkan::AddPointerToChain(&properties2, &push_descriptor_properties); + + // query + vkGetPhysicalDeviceProperties2(m_physical_device, &properties2); + + m_optional_extensions.vk_khr_push_descriptor &= (push_descriptor_properties.maxPushDescriptors >= 1); + + Log_InfoPrintf("VK_EXT_memory_budget is %s", + m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported"); + Log_InfoPrintf("VK_EXT_rasterization_order_attachment_access is %s", + m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); + Log_InfoPrintf("VK_EXT_attachment_feedback_loop_layout is %s", + m_optional_extensions.vk_ext_attachment_feedback_loop_layout ? "supported" : "NOT supported"); + Log_InfoPrintf("VK_KHR_push_descriptor is %s", + m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported"); +} + +bool VulkanDevice::CreateAllocator() +{ + VmaAllocatorCreateInfo ci = {}; + ci.vulkanApiVersion = VK_API_VERSION_1_1; + ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + ci.physicalDevice = m_physical_device; + ci.device = m_device; + ci.instance = m_instance; + + if (m_optional_extensions.vk_ext_memory_budget) + ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + + // Limit usage of the DEVICE_LOCAL upload heap when we're using a debug device. + // On NVIDIA drivers, it results in frequently running out of device memory when trying to + // play back captures in RenderDoc, making life very painful. Re-BAR GPUs should be fine. + constexpr VkDeviceSize UPLOAD_HEAP_SIZE_THRESHOLD = 512 * 1024 * 1024; + constexpr VkMemoryPropertyFlags UPLOAD_HEAP_PROPERTIES = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + std::array heap_size_limits; + if (m_debug_device) + { + VkPhysicalDeviceMemoryProperties memory_properties; + vkGetPhysicalDeviceMemoryProperties(m_physical_device, &memory_properties); + + bool has_upload_heap = false; + heap_size_limits.fill(VK_WHOLE_SIZE); + for (u32 i = 0; i < memory_properties.memoryTypeCount; i++) + { + // Look for any memory types which are upload-like. + const VkMemoryType& type = memory_properties.memoryTypes[i]; + if ((type.propertyFlags & UPLOAD_HEAP_PROPERTIES) != UPLOAD_HEAP_PROPERTIES) + continue; + + const VkMemoryHeap& heap = memory_properties.memoryHeaps[type.heapIndex]; + if (heap.size >= UPLOAD_HEAP_SIZE_THRESHOLD) + continue; + + if (heap_size_limits[type.heapIndex] == VK_WHOLE_SIZE) + { + Log_WarningPrintf("Disabling allocation from upload heap #%u (%.2f MB) due to debug device.", type.heapIndex, + static_cast(heap.size) / 1048576.0f); + heap_size_limits[type.heapIndex] = 0; + has_upload_heap = true; + } + } + + if (has_upload_heap) + ci.pHeapSizeLimit = heap_size_limits.data(); + } + + VkResult res = vmaCreateAllocator(&ci, &m_allocator); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: "); + return false; + } + + return true; +} + +void VulkanDevice::DestroyAllocator() +{ + if (m_allocator == VK_NULL_HANDLE) + return; + + vmaDestroyAllocator(m_allocator); + m_allocator = VK_NULL_HANDLE; +} + +bool VulkanDevice::CreateCommandBuffers() +{ + VkResult res; + + uint32_t frame_index = 0; + for (CommandBuffer& resources : m_frame_resources) + { + resources.needs_fence_wait = false; + + VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, + m_graphics_queue_family_index}; + res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: "); + return false; + } + Vulkan::SetObjectName(m_device, resources.command_pool, TinyString::FromFmt("Frame Command Pool {}", frame_index)); + + VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, + resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, + static_cast(resources.command_buffers.size())}; + + res = vkAllocateCommandBuffers(m_device, &buffer_info, resources.command_buffers.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: "); + return false; + } + for (u32 i = 0; i < resources.command_buffers.size(); i++) + { + Vulkan::SetObjectName(m_device, resources.command_buffers[i], + TinyString::FromFmt("Frame {} {}Command Buffer", frame_index, (i == 0) ? "Init" : "")); + } + + VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT}; + + res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateFence failed: "); + return false; + } + Vulkan::SetObjectName(m_device, resources.fence, TinyString::FromFmt("Frame Fence {}", frame_index)); + + if (!m_optional_extensions.vk_khr_push_descriptor) + { + VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}, + }; + + VkDescriptorPoolCreateInfo pool_create_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME, + static_cast(std::size(pool_sizes)), pool_sizes}; + + res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); + return false; + } + Vulkan::SetObjectName(m_device, resources.descriptor_pool, + TinyString::FromFmt("Frame Descriptor Pool {}", frame_index)); + } + + ++frame_index; + } + + BeginCommandBuffer(0); + return true; +} + +void VulkanDevice::DestroyCommandBuffers() +{ + for (CommandBuffer& resources : m_frame_resources) + { + if (resources.fence != VK_NULL_HANDLE) + vkDestroyFence(m_device, resources.fence, nullptr); + if (resources.descriptor_pool != VK_NULL_HANDLE) + vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr); + if (resources.command_buffers[0] != VK_NULL_HANDLE) + { + vkFreeCommandBuffers(m_device, resources.command_pool, static_cast(resources.command_buffers.size()), + resources.command_buffers.data()); + } + if (resources.command_pool != VK_NULL_HANDLE) + vkDestroyCommandPool(m_device, resources.command_pool, nullptr); + } +} + +bool VulkanDevice::CreatePersistentDescriptorPool() +{ + static constexpr const VkDescriptorPoolSize pool_sizes[] = { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}, + }; + + const VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 16, + static_cast(std::size(pool_sizes)), pool_sizes}; + + VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); + return false; + } + Vulkan::SetObjectName(m_device, m_global_descriptor_pool, "Global Descriptor Pool"); + + if (m_features.gpu_timing) + { + const VkQueryPoolCreateInfo query_create_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0}; + res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); + m_features.gpu_timing = false; + return false; + } + } + + return true; +} + +void VulkanDevice::DestroyPersistentDescriptorPool() +{ + if (m_timestamp_query_pool != VK_NULL_HANDLE) + vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr); + + if (m_global_descriptor_pool != VK_NULL_HANDLE) + vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr); +} + +VkRenderPass VulkanDevice::GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, + VkAttachmentLoadOp color_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */, + VkAttachmentStoreOp color_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */, + VkAttachmentLoadOp depth_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */, + VkAttachmentStoreOp depth_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */, + VkAttachmentLoadOp stencil_load_op /* = VK_ATTACHMENT_LOAD_OP_DONT_CARE */, + VkAttachmentStoreOp stencil_store_op /* = VK_ATTACHMENT_STORE_OP_DONT_CARE */, + bool color_feedback_loop /* = false */, bool depth_sampling /* = false */) +{ + RenderPassCacheKey key = {}; + key.color_format = color_format; + key.depth_format = depth_format; + key.samples = samples; + key.color_load_op = color_load_op; + key.color_store_op = color_store_op; + key.depth_load_op = depth_load_op; + key.depth_store_op = depth_store_op; + key.stencil_load_op = stencil_load_op; + key.stencil_store_op = stencil_store_op; + key.color_feedback_loop = color_feedback_loop; + key.depth_sampling = depth_sampling; + + auto it = m_render_pass_cache.find(key.key); + if (it != m_render_pass_cache.end()) + return it->second; + + return CreateCachedRenderPass(key); +} + +VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass) +{ + for (const auto& it : m_render_pass_cache) + { + if (it.second != pass) + continue; + + RenderPassCacheKey modified_key; + modified_key.key = it.first; + if (modified_key.color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + modified_key.color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (modified_key.depth_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + modified_key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + if (modified_key.stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + modified_key.stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + + if (modified_key.key == it.first) + return pass; + + auto fit = m_render_pass_cache.find(modified_key.key); + if (fit != m_render_pass_cache.end()) + return fit->second; + + return CreateCachedRenderPass(modified_key); + } + + return pass; +} + +VkCommandBuffer VulkanDevice::GetCurrentInitCommandBuffer() +{ + CommandBuffer& res = m_frame_resources[m_current_frame]; + VkCommandBuffer buf = res.command_buffers[0]; + if (res.init_buffer_used) + return buf; + + VkCommandBufferBeginInfo bi{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; + vkBeginCommandBuffer(buf, &bi); + res.init_buffer_used = true; + return buf; +} + +VkDescriptorSet VulkanDevice::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) +{ + VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, + m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout}; + + VkDescriptorSet descriptor_set; + VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); + if (res != VK_SUCCESS) + { + // Failing to allocate a descriptor set is not a fatal error, we can + // recover by moving to the next command buffer. + return VK_NULL_HANDLE; + } + + return descriptor_set; +} + +VkDescriptorSet VulkanDevice::AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout) +{ + VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, + m_global_descriptor_pool, 1, &set_layout}; + + VkDescriptorSet descriptor_set; + VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); + if (res != VK_SUCCESS) + return VK_NULL_HANDLE; + + return descriptor_set; +} + +void VulkanDevice::FreePersistentDescriptorSet(VkDescriptorSet set) +{ + vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set); +} + +void VulkanDevice::WaitForFenceCounter(u64 fence_counter) +{ + if (m_completed_fence_counter >= fence_counter) + return; + + // Find the first command buffer which covers this counter value. + u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + while (index != m_current_frame) + { + if (m_frame_resources[index].fence_counter >= fence_counter) + break; + + index = (index + 1) % NUM_COMMAND_BUFFERS; + } + + DebugAssert(index != m_current_frame); + WaitForCommandBufferCompletion(index); +} + +void VulkanDevice::WaitForGPUIdle() +{ + WaitForPresentComplete(); + vkDeviceWaitIdle(m_device); +} + +float VulkanDevice::GetAndResetAccumulatedGPUTime() +{ + const float time = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return time; +} + +bool VulkanDevice::SetGPUTimingEnabled(bool enabled) +{ + m_gpu_timing_enabled = enabled && m_features.gpu_timing; + return (enabled == m_gpu_timing_enabled); +} + +void VulkanDevice::WaitForCommandBufferCompletion(u32 index) +{ + // Wait for this command buffer to be completed. + VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); + m_last_submit_failed.store(true, std::memory_order_release); + return; + } + + // Clean up any resources for command buffers between the last known completed buffer and this + // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. + const u64 now_completed_counter = m_frame_resources[index].fence_counter; + u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; + while (cleanup_index != m_current_frame) + { + CommandBuffer& resources = m_frame_resources[cleanup_index]; + if (resources.fence_counter > now_completed_counter) + break; + + if (m_gpu_timing_enabled && resources.timestamp_written) + { + std::array timestamps; + res = + vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast(timestamps.size()), + sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT); + if (res == VK_SUCCESS) + { + // if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be + // zero + if (timestamps[0] > 0 && m_gpu_timing_enabled) + { + const double ns_diff = + (timestamps[1] - timestamps[0]) * static_cast(m_device_properties.limits.timestampPeriod); + m_accumulated_gpu_time += static_cast(ns_diff / 1000000.0); + } + } + else + { + LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); + } + } + + cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS; + } + + m_completed_fence_counter = now_completed_counter; + while (!m_cleanup_objects.empty()) + { + auto& it = m_cleanup_objects.front(); + if (it.first > now_completed_counter) + break; + it.second(); + m_cleanup_objects.pop_front(); + } +} + +void VulkanDevice::SubmitCommandBuffer(VulkanSwapChain* present_swap_chain /* = nullptr */, + bool submit_on_thread /* = false */) +{ + if (m_last_submit_failed.load(std::memory_order_acquire)) + return; + + CommandBuffer& resources = m_frame_resources[m_current_frame]; + + // End the current command buffer. + VkResult res; + if (resources.init_buffer_used) + { + res = vkEndCommandBuffer(resources.command_buffers[0]); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); + Panic("Failed to end command buffer"); + } + } + + if (m_gpu_timing_enabled && resources.timestamp_written) + { + vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, + m_current_frame * 2 + 1); + } + + res = vkEndCommandBuffer(resources.command_buffers[1]); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); + Panic("Failed to end command buffer"); + } + + // This command buffer now has commands, so can't be re-used without waiting. + resources.needs_fence_wait = true; + + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); + + if (!submit_on_thread || !m_present_thread.joinable()) + { + DoSubmitCommandBuffer(m_current_frame, present_swap_chain); + if (present_swap_chain) + DoPresent(present_swap_chain); + return; + } + + m_queued_present.command_buffer_index = m_current_frame; + m_queued_present.swap_chain = present_swap_chain; + m_present_done.store(false); + m_present_queued_cv.notify_one(); +} + +void VulkanDevice::DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain) +{ + CommandBuffer& resources = m_frame_resources[index]; + + uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + submit_info.commandBufferCount = resources.init_buffer_used ? 2u : 1u; + submit_info.pCommandBuffers = + resources.init_buffer_used ? resources.command_buffers.data() : &resources.command_buffers[1]; + + if (present_swap_chain) + { + submit_info.pWaitSemaphores = present_swap_chain->GetImageAvailableSemaphorePtr(); + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitDstStageMask = &wait_bits; + + submit_info.pSignalSemaphores = present_swap_chain->GetRenderingFinishedSemaphorePtr(); + submit_info.signalSemaphoreCount = 1; + } + + const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); + m_last_submit_failed.store(true, std::memory_order_release); + return; + } +} + +void VulkanDevice::DoPresent(VulkanSwapChain* present_swap_chain) +{ + const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + nullptr, + 1, + present_swap_chain->GetRenderingFinishedSemaphorePtr(), + 1, + present_swap_chain->GetSwapChainPtr(), + present_swap_chain->GetCurrentImageIndexPtr(), + nullptr}; + + present_swap_chain->ReleaseCurrentImage(); + + const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info); + if (res != VK_SUCCESS) + { + // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. + if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) + LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); + + m_last_present_failed.store(true, std::memory_order_release); + return; + } + + // Grab the next image as soon as possible, that way we spend less time blocked on the next + // submission. Don't care if it fails, we'll deal with that at the presentation call site. + // Credit to dxvk for the idea. + present_swap_chain->AcquireNextImage(); +} + +void VulkanDevice::WaitForPresentComplete() +{ + if (m_present_done.load()) + return; + + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); +} + +void VulkanDevice::WaitForPresentComplete(std::unique_lock& lock) +{ + if (m_present_done.load()) + return; + + m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); }); +} + +void VulkanDevice::PresentThread() +{ + std::unique_lock lock(m_present_mutex); + while (!m_present_thread_done.load()) + { + m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); }); + + if (m_present_done.load()) + continue; + + DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain); + if (m_queued_present.swap_chain) + DoPresent(m_queued_present.swap_chain); + m_present_done.store(true); + m_present_done_cv.notify_one(); + } +} + +void VulkanDevice::StartPresentThread() +{ + DebugAssert(!m_present_thread.joinable()); + m_present_thread_done.store(false); + m_present_thread = std::thread(&VulkanDevice::PresentThread, this); +} + +void VulkanDevice::StopPresentThread() +{ + if (!m_present_thread.joinable()) + return; + + { + std::unique_lock lock(m_present_mutex); + WaitForPresentComplete(lock); + m_present_thread_done.store(true); + m_present_queued_cv.notify_one(); + } + + m_present_thread.join(); +} + +void VulkanDevice::MoveToNextCommandBuffer() +{ + BeginCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); +} + +void VulkanDevice::BeginCommandBuffer(u32 index) +{ + CommandBuffer& resources = m_frame_resources[index]; + + if (!m_present_done.load() && m_queued_present.command_buffer_index == index) + WaitForPresentComplete(); + + // Wait for the GPU to finish with all resources for this command buffer. + if (resources.fence_counter > m_completed_fence_counter) + WaitForCommandBufferCompletion(index); + + // Reset fence to unsignaled before starting. + VkResult res = vkResetFences(m_device, 1, &resources.fence); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetFences failed: "); + + // Reset command pools to beginning since we can re-use the memory now + res = vkResetCommandPool(m_device, resources.command_pool, 0); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: "); + + // Enable commands to be recorded to the two buffers again. + VkCommandBufferBeginInfo begin_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; + res = vkBeginCommandBuffer(resources.command_buffers[1], &begin_info); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: "); + + // Also can do the same for the descriptor pools + if (resources.descriptor_pool != VK_NULL_HANDLE) + { + res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: "); + } + + if (m_gpu_timing_enabled) + { + vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2); + vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, + index * 2); + } + + resources.fence_counter = m_next_fence_counter++; + resources.init_buffer_used = false; + resources.timestamp_written = m_gpu_timing_enabled; + + m_current_frame = index; + m_current_command_buffer = resources.command_buffers[1]; + + // using the lower 32 bits of the fence index should be sufficient here, I hope... + vmaSetCurrentFrameIndex(m_allocator, static_cast(m_next_fence_counter)); +} + +void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion) +{ + DebugAssert(!InRenderPass()); + + const u32 current_frame = m_current_frame; + SubmitCommandBuffer(); + MoveToNextCommandBuffer(); + + if (wait_for_completion) + WaitForCommandBufferCompletion(current_frame); + + InvalidateCachedState(); +} + +void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion, const char* reason, ...) +{ + std::va_list ap; + va_start(ap, reason); + const std::string reason_str(StringUtil::StdStringFromFormatV(reason, ap)); + va_end(ap); + + Log_WarningPrintf("Executing command buffer due to '%s'", reason_str.c_str()); + SubmitCommandBuffer(wait_for_completion); +} + +void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) +{ + if (InRenderPass()) + EndRenderPass(); + + VulkanFramebuffer* fb = m_current_framebuffer; + VulkanPipeline* pl = m_current_pipeline; + SubmitCommandBuffer(false, "%s", reason); + + if (fb) + SetFramebuffer(fb); + SetPipeline(pl); + BeginRenderPass(); +} + +bool VulkanDevice::CheckLastPresentFail() +{ + return m_last_present_failed.exchange(false, std::memory_order_acq_rel); +} + +bool VulkanDevice::CheckLastSubmitFail() +{ + return m_last_submit_failed.load(std::memory_order_acquire); +} + +void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); }); +} + +void VulkanDevice::DeferFramebufferDestruction(VkFramebuffer object) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); }); +} + +void VulkanDevice::DeferImageDestruction(VkImage object, VmaAllocation allocation) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); }); +} + +void VulkanDevice::DeferImageViewDestruction(VkImageView object) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object]() { vkDestroyImageView(m_device, object, nullptr); }); +} + +void VulkanDevice::DeferPipelineDestruction(VkPipeline object) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object]() { vkDestroyPipeline(m_device, object, nullptr); }); +} + +void VulkanDevice::DeferBufferViewDestruction(VkBufferView object) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), + [this, object]() { vkDestroyBufferView(m_device, object, nullptr); }); +} + +void VulkanDevice::DeferPersistentDescriptorSetDestruction(VkDescriptorSet object) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object]() { FreePersistentDescriptorSet(object); }); +} + +VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, + void* pUserData) +{ + if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) + { + Log_ErrorPrintf("Vulkan debug report: (%s) %s", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", + pCallbackData->pMessage); + } + else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) + { + Log_WarningPrintf("Vulkan debug report: (%s) %s", + pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage); + } + else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) + { + Log_InfoPrintf("Vulkan debug report: (%s) %s", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", + pCallbackData->pMessage); + } + else + { + Log_DevPrintf("Vulkan debug report: (%s) %s", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", + pCallbackData->pMessage); + } + + return VK_FALSE; +} + +bool VulkanDevice::EnableDebugUtils() +{ + // Already enabled? + if (m_debug_messenger_callback != VK_NULL_HANDLE) + return true; + + // Check for presence of the functions before calling + if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT) + { + return false; + } + + VkDebugUtilsMessengerCreateInfoEXT messenger_info = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + nullptr, + 0, + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, + VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, + DebugMessengerCallback, + nullptr}; + + const VkResult res = + vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: "); + return false; + } + + return true; +} + +void VulkanDevice::DisableDebugUtils() +{ + if (m_debug_messenger_callback != VK_NULL_HANDLE) + { + vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr); + m_debug_messenger_callback = VK_NULL_HANDLE; + } +} + +VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) +{ + VkAttachmentReference color_reference; + VkAttachmentReference* color_reference_ptr = nullptr; + VkAttachmentReference depth_reference; + VkAttachmentReference* depth_reference_ptr = nullptr; + VkAttachmentReference input_reference; + VkAttachmentReference* input_reference_ptr = nullptr; + VkSubpassDependency subpass_dependency; + VkSubpassDependency* subpass_dependency_ptr = nullptr; + std::array attachments; + u32 num_attachments = 0; + if (key.color_format != VK_FORMAT_UNDEFINED) + { + const VkImageLayout layout = + key.color_feedback_loop ? + (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[num_attachments] = {0, + static_cast(key.color_format), + static_cast(key.samples), + static_cast(key.color_load_op), + static_cast(key.color_store_op), + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + layout, + layout}; + color_reference.attachment = num_attachments; + color_reference.layout = layout; + color_reference_ptr = &color_reference; + + if (key.color_feedback_loop) + { + if (!UseFeedbackLoopLayout()) + { + input_reference.attachment = num_attachments; + input_reference.layout = layout; + input_reference_ptr = &input_reference; + } + + if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access) + { + // don't need the framebuffer-local dependency when we have rasterization order attachment access + subpass_dependency.srcSubpass = 0; + subpass_dependency.dstSubpass = 0; + subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + subpass_dependency.dstAccessMask = + UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + subpass_dependency.dependencyFlags = UseFeedbackLoopLayout() ? + (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) : + VK_DEPENDENCY_BY_REGION_BIT; + subpass_dependency_ptr = &subpass_dependency; + } + } + + num_attachments++; + } + if (key.depth_format != VK_FORMAT_UNDEFINED) + { + const VkImageLayout layout = + key.depth_sampling ? + (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachments[num_attachments] = {0, + static_cast(key.depth_format), + static_cast(key.samples), + static_cast(key.depth_load_op), + static_cast(key.depth_store_op), + static_cast(key.stencil_load_op), + static_cast(key.stencil_store_op), + layout, + layout}; + depth_reference.attachment = num_attachments; + depth_reference.layout = layout; + depth_reference_ptr = &depth_reference; + num_attachments++; + } + + const VkSubpassDescriptionFlags subpass_flags = + (key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ? + VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT : + 0; + const VkSubpassDescription subpass = {subpass_flags, + VK_PIPELINE_BIND_POINT_GRAPHICS, + input_reference_ptr ? 1u : 0u, + input_reference_ptr ? input_reference_ptr : nullptr, + color_reference_ptr ? 1u : 0u, + color_reference_ptr ? color_reference_ptr : nullptr, + nullptr, + depth_reference_ptr, + 0, + nullptr}; + const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + nullptr, + 0u, + num_attachments, + attachments.data(), + 1u, + &subpass, + subpass_dependency_ptr ? 1u : 0u, + subpass_dependency_ptr}; + + VkRenderPass pass; + const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: "); + return VK_NULL_HANDLE; + } + + m_render_pass_cache.emplace(key.key, pass); + return pass; +} + +void VulkanDevice::GetAdapterAndModeList(AdapterAndModeList* ret, VkInstance instance) +{ + GPUList gpus = EnumerateGPUs(instance); + ret->adapter_names.clear(); + for (auto& [gpu, name] : gpus) + ret->adapter_names.push_back(std::move(name)); +} + +GPUDevice::AdapterAndModeList VulkanDevice::StaticGetAdapterAndModeList() +{ + AdapterAndModeList ret; + std::unique_lock lock(s_instance_mutex); + + // Device shouldn't be torn down since we have the lock. + if (g_gpu_device && g_gpu_device->GetRenderAPI() == RenderAPI::Vulkan && Vulkan::IsVulkanLibraryLoaded()) + { + GetAdapterAndModeList(&ret, VulkanDevice::GetInstance().m_instance); + } + else + { + if (Vulkan::LoadVulkanLibrary()) + { + ScopedGuard lib_guard([]() { Vulkan::UnloadVulkanLibrary(); }); + const VkInstance instance = CreateVulkanInstance(WindowInfo(), false, false); + if (instance != VK_NULL_HANDLE) + { + if (Vulkan::LoadVulkanInstanceFunctions(instance)) + GetAdapterAndModeList(&ret, instance); + + vkDestroyInstance(instance, nullptr); + } + } + } + + return ret; +} + +GPUDevice::AdapterAndModeList VulkanDevice::GetAdapterAndModeList() +{ + AdapterAndModeList ret; + GetAdapterAndModeList(&ret, m_instance); + return ret; +} + +bool VulkanDevice::IsSuitableDefaultRenderer() +{ +#ifdef __ANDROID__ + // No way in hell. + return false; +#else + AdapterAndModeList aml = StaticGetAdapterAndModeList(); + if (aml.adapter_names.empty()) + { + // No adapters, not gonna be able to use VK. + return false; + } + + // Check the first GPU, should be enough. + const std::string& name = aml.adapter_names.front(); + Log_InfoPrintf(fmt::format("Using Vulkan GPU '{}' for automatic renderer check.", name).c_str()); + + // Any software rendering (LLVMpipe, SwiftShader). + if (StringUtil::StartsWithNoCase(name, "llvmpipe") || StringUtil::StartsWithNoCase(name, "SwiftShader")) + { + Log_InfoPrintf("Not using Vulkan for software renderer."); + return false; + } + + // For Intel, OpenGL usually ends up faster on Linux, because of fbfetch. + // Plus, the Ivy Bridge and Haswell drivers are incomplete. + if (StringUtil::StartsWithNoCase(name, "Intel")) + { + Log_InfoPrintf("Not using Vulkan for Intel GPU."); + return false; + } + + Log_InfoPrintf("Allowing Vulkan as default renderer."); + return true; +#endif +} + +RenderAPI VulkanDevice::GetRenderAPI() const +{ + return RenderAPI::Vulkan; +} + +bool VulkanDevice::HasSurface() const +{ + return static_cast(m_swap_chain); +} + +bool VulkanDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation) +{ + std::unique_lock lock(s_instance_mutex); + bool enable_debug_utils = m_debug_device; + bool enable_validation_layer = m_debug_device; + std::optional exclusive_fullscreen_control; + + if (!Vulkan::LoadVulkanLibrary()) + { + Host::ReportErrorAsync("Error", "Failed to load Vulkan library. Does your GPU and/or driver support Vulkan?"); + return false; + } + + m_instance = CreateVulkanInstance(m_window_info, enable_debug_utils, enable_validation_layer); + if (m_instance == VK_NULL_HANDLE) + { + if (enable_debug_utils || enable_validation_layer) + { + // Try again without the validation layer. + enable_debug_utils = false; + enable_validation_layer = false; + m_instance = CreateVulkanInstance(m_window_info, enable_debug_utils, enable_validation_layer); + if (m_instance == VK_NULL_HANDLE) + { + Host::ReportErrorAsync("Error", + "Failed to create Vulkan instance. Does your GPU and/or driver support Vulkan?"); + return false; + } + + Log_ErrorPrintf("Vulkan validation/debug layers requested but are unavailable. Creating non-debug device."); + } + } + + if (!Vulkan::LoadVulkanInstanceFunctions(m_instance)) + { + Log_ErrorPrintf("Failed to load Vulkan instance functions"); + return false; + } + + GPUList gpus = EnumerateGPUs(m_instance); + if (gpus.empty()) + { + Host::ReportErrorAsync("Error", "No physical devices found. Does your GPU and/or driver support Vulkan?"); + return false; + } + + if (!adapter.empty()) + { + u32 gpu_index = 0; + for (; gpu_index < static_cast(gpus.size()); gpu_index++) + { + Log_InfoPrint(fmt::format("GPU {}: {}", gpu_index, gpus[gpu_index].second).c_str()); + if (gpus[gpu_index].second == adapter) + { + m_physical_device = gpus[gpu_index].first; + break; + } + } + + if (gpu_index == static_cast(gpus.size())) + { + Log_WarningPrint(fmt::format("Requested GPU '{}' not found, using first ({})", adapter, gpus[0].second).c_str()); + m_physical_device = gpus[0].first; + } + } + else + { + Log_InfoPrint(fmt::format("No GPU requested, using first ({})", gpus[0].second).c_str()); + m_physical_device = gpus[0].first; + } + + // Read device physical memory properties, we need it for allocating buffers + vkGetPhysicalDeviceProperties(m_physical_device, &m_device_properties); + m_device_properties.limits.minUniformBufferOffsetAlignment = + std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast(1)); + m_device_properties.limits.minTexelBufferOffsetAlignment = + std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast(1)); + m_device_properties.limits.optimalBufferCopyOffsetAlignment = + std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast(1)); + m_device_properties.limits.optimalBufferCopyRowPitchAlignment = + std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast(1)); + m_device_properties.limits.bufferImageGranularity = + std::max(m_device_properties.limits.bufferImageGranularity, static_cast(1)); + + if (enable_debug_utils) + EnableDebugUtils(); + + VkSurfaceKHR surface = VK_NULL_HANDLE; + ScopedGuard surface_cleanup = [this, &surface]() { + if (surface != VK_NULL_HANDLE) + vkDestroySurfaceKHR(m_instance, surface, nullptr); + }; + if (m_window_info.type != WindowInfo::Type::Surfaceless) + { + surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info); + if (surface == VK_NULL_HANDLE) + return false; + } + + // Attempt to create the device. + if (!CreateDevice(surface, enable_validation_layer)) + return false; + + if (!CheckFeatures()) + { + Host::ReportErrorAsync("Error", "Your GPU does not support the required Vulkan features."); + return false; + } + + // And critical resources. + if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts()) + return false; + + if (threaded_presentation) + StartPresentThread(); + + if (surface != VK_NULL_HANDLE) + { + m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, m_vsync_enabled, exclusive_fullscreen_control); + if (!m_swap_chain) + { + Log_ErrorPrintf("Failed to create swap chain"); + return false; + } + + // NOTE: This is assigned afterwards, because some platforms can modify the window info (e.g. Metal). + m_window_info = m_swap_chain->GetWindowInfo(); + } + + surface_cleanup.Cancel(); + + // Render a frame as soon as possible to clear out whatever was previously being displayed. + if (m_window_info.type != WindowInfo::Type::Surfaceless) + RenderBlankFrame(); + + if (!CreateNullTexture()) + { + Log_ErrorPrint("Failed to create dummy texture"); + return false; + } + + if (!CreateBuffers() || !CreatePersistentDescriptorSets()) + return false; + + return true; +} + +void VulkanDevice::DestroyDevice() +{ + std::unique_lock lock(s_instance_mutex); + + if (InRenderPass()) + EndRenderPass(); + + // Don't both submitting the current command buffer, just toss it. + if (m_device != VK_NULL_HANDLE) + WaitForGPUIdle(); + + StopPresentThread(); + m_swap_chain.reset(); + + if (m_null_texture) + { + m_null_texture->Destroy(false); + m_null_texture.reset(); + } + for (auto& it : m_cleanup_objects) + it.second(); + m_cleanup_objects.clear(); + DestroyDownloadBuffer(); + DestroyPersistentDescriptorSets(); + DestroyBuffers(); + DestroySamplers(); + + DestroyPersistentDescriptorPool(); + DestroyPipelineLayouts(); + DestroyCommandBuffers(); + DestroyAllocator(); + + for (auto& it : m_render_pass_cache) + vkDestroyRenderPass(m_device, it.second, nullptr); + m_render_pass_cache.clear(); + + if (m_pipeline_cache != VK_NULL_HANDLE) + { + vkDestroyPipelineCache(m_device, m_pipeline_cache, nullptr); + m_pipeline_cache = VK_NULL_HANDLE; + } + + if (m_device != VK_NULL_HANDLE) + { + vkDestroyDevice(m_device, nullptr); + m_device = VK_NULL_HANDLE; + } + + if (m_debug_messenger_callback != VK_NULL_HANDLE) + DisableDebugUtils(); + + if (m_instance != VK_NULL_HANDLE) + { + vkDestroyInstance(m_instance, nullptr); + m_instance = VK_NULL_HANDLE; + } + + Vulkan::UnloadVulkanLibrary(); +} + +bool VulkanDevice::ValidatePipelineCacheHeader(const VK_PIPELINE_CACHE_HEADER& header) +{ + if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + Log_ErrorPrintf("Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + { + Log_ErrorPrintf("Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (header.vendor_id != m_device_properties.vendorID) + { + Log_ErrorPrintf("Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", + header.vendor_id, m_device_properties.vendorID); + return false; + } + + if (header.device_id != m_device_properties.deviceID) + { + Log_ErrorPrintf("Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", + header.device_id, m_device_properties.deviceID); + return false; + } + + if (std::memcmp(header.uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE) != 0) + { + Log_ErrorPrintf("Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +void VulkanDevice::FillPipelineCacheHeader(VK_PIPELINE_CACHE_HEADER* header) +{ + header->header_length = sizeof(VK_PIPELINE_CACHE_HEADER); + header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; + header->vendor_id = m_device_properties.vendorID; + header->device_id = m_device_properties.deviceID; + std::memcpy(header->uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE); +} + +bool VulkanDevice::ReadPipelineCache(const std::string& filename) +{ + std::optional> data; + + auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "rb"); + if (fp) + { + data = FileSystem::ReadBinaryFile(fp.get()); + + if (data.has_value()) + { + if (data->size() < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + Log_ErrorPrintf("Pipeline cache at '%s' is too small", filename.c_str()); + return false; + } + + VK_PIPELINE_CACHE_HEADER header; + std::memcpy(&header, data->data(), sizeof(header)); + if (!ValidatePipelineCacheHeader(header)) + data.reset(); + } + } + + const VkPipelineCacheCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, nullptr, 0, + data.has_value() ? data->size() : 0, data.has_value() ? data->data() : nullptr}; + VkResult res = vkCreatePipelineCache(m_device, &ci, nullptr, &m_pipeline_cache); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache() failed: "); + return false; + } + + return true; +} + +bool VulkanDevice::GetPipelineCacheData(DynamicHeapArray* data) +{ + if (m_pipeline_cache == VK_NULL_HANDLE) + return false; + + size_t data_size; + VkResult res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() failed: "); + return false; + } + + data->resize(data_size); + res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, data->data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() (2) failed: "); + return false; + } + + data->resize(data_size); + return true; +} + +bool VulkanDevice::UpdateWindow() +{ + DestroySurface(); + + if (!AcquireWindow(false)) + return false; + + if (m_window_info.IsSurfaceless()) + return true; + + // make sure previous frames are presented + if (InRenderPass()) + EndRenderPass(); + SubmitCommandBuffer(false); + WaitForGPUIdle(); + + // recreate surface in existing swap chain if it already exists + if (m_swap_chain) + { + if (m_swap_chain->RecreateSurface(m_window_info)) + { + m_window_info = m_swap_chain->GetWindowInfo(); + return true; + } + + m_swap_chain.reset(); + } + + VkSurfaceKHR surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info); + if (surface == VK_NULL_HANDLE) + { + Log_ErrorPrintf("Failed to create new surface for swap chain"); + return false; + } + + // TODO: exclusive fullscreen control + m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, m_vsync_enabled, std::nullopt); + if (!m_swap_chain) + { + Log_ErrorPrintf("Failed to create swap chain"); + VulkanSwapChain::DestroyVulkanSurface(m_instance, &m_window_info, surface); + return false; + } + + m_window_info = m_swap_chain->GetWindowInfo(); + RenderBlankFrame(); + return true; +} + +void VulkanDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + if (m_swap_chain->GetWidth() == static_cast(new_window_width) && + m_swap_chain->GetHeight() == static_cast(new_window_height)) + { + // skip unnecessary resizes + m_window_info.surface_scale = new_window_scale; + return; + } + + // make sure previous frames are presented + WaitForGPUIdle(); + + if (!m_swap_chain->ResizeSwapChain(new_window_width, new_window_height, new_window_scale)) + { + // AcquireNextImage() will fail, and we'll recreate the surface. + Log_ErrorPrintf("Failed to resize swap chain. Next present will fail."); + return; + } + + m_window_info = m_swap_chain->GetWindowInfo(); +} + +void VulkanDevice::DestroySurface() +{ + WaitForGPUIdle(); + m_swap_chain.reset(); +} + +bool VulkanDevice::SupportsTextureFormat(GPUTexture::Format format) const +{ + return (TEXTURE_FORMAT_MAPPING[static_cast(format)] != VK_FORMAT_UNDEFINED); +} + +std::string VulkanDevice::GetDriverInfo() const +{ + std::string ret; + const u32 api_version = m_device_properties.apiVersion; + const u32 driver_version = m_device_properties.driverVersion; + if (m_optional_extensions.vk_khr_driver_properties) + { + const VkPhysicalDeviceDriverProperties& props = m_device_driver_properties; + ret = StringUtil::StdStringFromFormat( + "Driver %u.%u.%u\nVulkan %u.%u.%u\nConformance Version %u.%u.%u.%u\n%s\n%s\n%s", VK_VERSION_MAJOR(driver_version), + VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), VK_API_VERSION_MAJOR(api_version), + VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version), props.conformanceVersion.major, + props.conformanceVersion.minor, props.conformanceVersion.subminor, props.conformanceVersion.patch, + props.driverInfo, props.driverName, m_device_properties.deviceName); + } + else + { + ret = StringUtil::StdStringFromFormat("Driver %u.%u.%u\nVulkan %u.%u.%u\n%s", VK_VERSION_MAJOR(driver_version), + VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), + VK_API_VERSION_MAJOR(api_version), VK_API_VERSION_MINOR(api_version), + VK_API_VERSION_PATCH(api_version), m_device_properties.deviceName); + } + + return ret; +} + +void VulkanDevice::SetVSync(bool enabled) +{ + if (!m_swap_chain || m_vsync_enabled == enabled) + return; + + // This swap chain should not be used by the current buffer, thus safe to destroy. + WaitForGPUIdle(); + if (!m_swap_chain->SetVSync(enabled)) + { + // Try switching back to the old mode.. + if (!m_swap_chain->SetVSync(m_vsync_enabled)) + { + Panic("Failed to reset old vsync mode after failure"); + m_swap_chain.reset(); + } + } + + m_vsync_enabled = enabled; +} + +bool VulkanDevice::BeginPresent(bool frame_skip) +{ + if (InRenderPass()) + EndRenderPass(); + + if (frame_skip) + return false; + + // If we're running surfaceless, kick the command buffer so we don't run out of descriptors. + if (!m_swap_chain) + { + SubmitCommandBuffer(false); + return false; + } + + // Previous frame needs to be presented before we can acquire the swap chain. + WaitForPresentComplete(); + + // Check if the device was lost. + if (CheckLastSubmitFail()) + { + Panic("Fixme"); // TODO + return false; + } + + VkResult res = m_swap_chain->AcquireNextImage(); + if (res != VK_SUCCESS) + { + m_swap_chain->ReleaseCurrentImage(); + + if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) + { + ResizeWindow(0, 0, m_window_info.surface_scale); + res = m_swap_chain->AcquireNextImage(); + } + else if (res == VK_ERROR_SURFACE_LOST_KHR) + { + Log_WarningPrintf("Surface lost, attempting to recreate"); + if (!m_swap_chain->RecreateSurface(m_window_info)) + { + Log_ErrorPrintf("Failed to recreate surface after loss"); + SubmitCommandBuffer(false); + return false; + } + + res = m_swap_chain->AcquireNextImage(); + } + + // This can happen when multiple resize events happen in quick succession. + // In this case, just wait until the next frame to try again. + if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) + { + // Still submit the command buffer, otherwise we'll end up with several frames waiting. + LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: "); + SubmitCommandBuffer(false); + return false; + } + } + + BeginSwapChainRenderPass(); + return true; +} + +void VulkanDevice::EndPresent() +{ + DebugAssert(InRenderPass() && !m_current_framebuffer); + EndRenderPass(); + + VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget, 0, 1, + 0, 1, VulkanTexture::Layout::ColorAttachment, + VulkanTexture::Layout::PresentSrc); + SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing()); + MoveToNextCommandBuffer(); + InvalidateCachedState(); +} + +#ifdef _DEBUG +static std::array Palette(float phase, const std::array& a, const std::array& b, + const std::array& c, const std::array& d) +{ + std::array result; + result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); + result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); + result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); + return result; +} +#endif + +void VulkanDevice::PushDebugGroup(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!vkCmdBeginDebugUtilsLabelEXT || !m_debug_device) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + const std::array color = Palette(static_cast(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, + {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); + + const VkDebugUtilsLabelEXT label = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + nullptr, + buf.c_str(), + {color[0], color[1], color[2], 1.0f}, + }; + vkCmdBeginDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label); +#endif +} + +void VulkanDevice::PopDebugGroup() +{ +#ifdef _DEBUG + if (!vkCmdEndDebugUtilsLabelEXT || !m_debug_device) + return; + + s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); + + vkCmdEndDebugUtilsLabelEXT(GetCurrentCommandBuffer()); +#endif +} + +void VulkanDevice::InsertDebugMessage(const char* fmt, ...) +{ +#ifdef _DEBUG + if (!vkCmdInsertDebugUtilsLabelEXT || !m_debug_device) + return; + + std::va_list ap; + va_start(ap, fmt); + const std::string buf(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + if (buf.empty()) + return; + + const VkDebugUtilsLabelEXT label = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, buf.c_str(), {0.0f, 0.0f, 0.0f, 1.0f}}; + vkCmdInsertDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label); +#endif +} + +bool VulkanDevice::CheckFeatures() +{ + m_max_texture_size = m_device_properties.limits.maxImageDimension2D; + + VkImageFormatProperties color_properties = {}; + vkGetPhysicalDeviceImageFormatProperties(m_physical_device, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, + &color_properties); + VkImageFormatProperties depth_properties = {}; + vkGetPhysicalDeviceImageFormatProperties(m_physical_device, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TYPE_2D, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, + &depth_properties); + const VkSampleCountFlags combined_properties = m_device_properties.limits.framebufferColorSampleCounts & + m_device_properties.limits.framebufferDepthSampleCounts & + color_properties.sampleCounts & depth_properties.sampleCounts; + if (combined_properties & VK_SAMPLE_COUNT_64_BIT) + m_max_multisamples = 64; + else if (combined_properties & VK_SAMPLE_COUNT_32_BIT) + m_max_multisamples = 32; + else if (combined_properties & VK_SAMPLE_COUNT_16_BIT) + m_max_multisamples = 16; + else if (combined_properties & VK_SAMPLE_COUNT_8_BIT) + m_max_multisamples = 8; + else if (combined_properties & VK_SAMPLE_COUNT_4_BIT) + m_max_multisamples = 4; + else if (combined_properties & VK_SAMPLE_COUNT_2_BIT) + m_max_multisamples = 2; + else + m_max_multisamples = 1; + + m_features.dual_source_blend = m_device_features.dualSrcBlend; // TODO: Option to disable + + if (!m_features.dual_source_blend) + Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance."); + + m_features.noperspective_interpolation = true; + m_features.per_sample_shading = true; + m_features.supports_texture_buffers = true; + +#ifdef __APPLE__ + // Partial texture buffer uploads appear to be broken in macOS/MoltenVK. + m_features.texture_buffers_emulated_with_ssbo = true; +#else + const u32 max_texel_buffer_elements = m_device_properties.limits.maxTexelBufferElements; + Log_InfoPrintf("Max texel buffer elements: %u", max_texel_buffer_elements); + if (max_texel_buffer_elements < MIN_TEXEL_BUFFER_ELEMENTS) + { + m_features.texture_buffers_emulated_with_ssbo = true; + } +#endif + + if (m_features.texture_buffers_emulated_with_ssbo) + Log_WarningPrintf("Emulating texture buffers with SSBOs."); + + m_features.partial_msaa_resolve = true; + m_features.shader_cache = true; + m_features.pipeline_cache = true; + + return true; +} + +void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + VulkanTexture* const S = static_cast(src); + VulkanTexture* const D = static_cast(dst); + + if (S->GetState() == GPUTexture::State::Cleared) + { + // source is cleared. if destination is a render target, we can carry the clear forward + if (D->IsRenderTargetOrDepthStencil()) + { + if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight()) + { + // pass it forward if we're clearing the whole thing + if (S->IsDepthStencil()) + D->SetClearDepth(S->GetClearDepth()); + else + D->SetClearColor(S->GetClearColor()); + + return; + } + + if (D->GetState() == GPUTexture::State::Cleared) + { + // destination is cleared, if it's the same colour and rect, we can just avoid this entirely + if (D->IsDepthStencil()) + { + if (D->GetClearDepth() == S->GetClearDepth()) + return; + } + else + { + if (D->GetClearColor() == S->GetClearColor()) + return; + } + } + + // TODO: Could use attachment clear here.. + } + + // commit the clear to the source first, then do normal copy + S->CommitClear(); + } + + // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first + // (the area outside of where we're copying to) + if (D->GetState() == GPUTexture::State::Cleared && + (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight())) + { + D->CommitClear(); + } + + // *now* we can do a normal image copy. + const VkImageAspectFlags src_aspect = (S->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageAspectFlags dst_aspect = (D->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageCopy ic = {{src_aspect, src_level, src_layer, 1u}, + {static_cast(src_x), static_cast(src_y), 0}, + {dst_aspect, dst_level, dst_layer, 1u}, + {static_cast(dst_x), static_cast(dst_y), 0}, + {static_cast(width), static_cast(height), 1u}}; + + if (InRenderPass()) + EndRenderPass(); + + S->SetUseFenceCounter(GetCurrentFenceCounter()); + D->SetUseFenceCounter(GetCurrentFenceCounter()); + S->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferSrc); + D->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferDst); + + vkCmdCopyImage(GetCurrentCommandBuffer(), S->GetImage(), S->GetVkLayout(), D->GetImage(), D->GetVkLayout(), 1, &ic); + + D->SetState(GPUTexture::State::Dirty); +} + +void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) +{ + DebugAssert((src_x + width) <= src->GetWidth()); + DebugAssert((src_y + height) <= src->GetHeight()); + DebugAssert(src->IsMultisampled()); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); + DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + + if (InRenderPass()) + EndRenderPass(); + + VulkanTexture* D = static_cast(dst); + VulkanTexture* S = static_cast(src); + const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + + S->CommitClear(cmdbuf); + D->CommitClear(cmdbuf); + + S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, S->GetLayout(), VulkanTexture::Layout::TransferSrc); + D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, D->GetLayout(), + VulkanTexture::Layout::TransferSrc); + + const VkImageResolve resolve = {{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, + {static_cast(src_x), static_cast(src_y), 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, 1u}, + {static_cast(dst_x), static_cast(dst_y), 0}, + {width, height, 1}}; + vkCmdResolveImage(cmdbuf, S->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, D->GetImage(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &resolve); + + S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, S->GetLayout()); + D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, VulkanTexture::Layout::TransferSrc, + D->GetLayout()); +} + +void VulkanDevice::ClearRenderTarget(GPUTexture* t, u32 c) +{ + GPUDevice::ClearRenderTarget(t, c); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + EndRenderPass(); +} + +void VulkanDevice::ClearDepth(GPUTexture* t, float d) +{ + GPUDevice::ClearDepth(t, d); + if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + EndRenderPass(); +} + +void VulkanDevice::InvalidateRenderTarget(GPUTexture* t) +{ + GPUDevice::InvalidateRenderTarget(t); + if (InRenderPass() && m_current_framebuffer && + (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + { + EndRenderPass(); + } +} + +bool VulkanDevice::CreateBuffers() +{ + if (!m_vertex_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate vertex buffer"); + return false; + } + + if (!m_index_buffer.Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate index buffer"); + return false; + } + + if (!m_uniform_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate uniform buffer"); + return false; + } + + if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE)) + { + Log_ErrorPrint("Failed to allocate texture upload buffer"); + return false; + } + + return true; +} + +void VulkanDevice::DestroyBuffers() +{ + m_texture_upload_buffer.Destroy(false); + m_uniform_buffer.Destroy(false); + m_index_buffer.Destroy(false); + m_vertex_buffer.Destroy(false); +} + +void VulkanDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const u32 req_size = vertex_size * vertex_count; + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + { + SubmitCommandBufferAndRestartRenderPass("out of vertex space"); + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + Panic("Failed to allocate vertex space"); + } + + *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); + *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; + *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; +} + +void VulkanDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.CommitMemory(vertex_size * vertex_count); +} + +void VulkanDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const u32 req_size = sizeof(DrawIndex) * index_count; + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + { + SubmitCommandBufferAndRestartRenderPass("out of index space"); + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + Panic("Failed to allocate index space"); + } + + *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); + *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); + *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); +} + +void VulkanDevice::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); +} + +void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size) +{ + DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); + vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0, + data_size, data); +} + +void* VulkanDevice::MapUniformBuffer(u32 size) +{ + const u32 align = static_cast(m_device_properties.limits.minUniformBufferOffsetAlignment); + const u32 used_space = Common::AlignUpPow2(size, align); + if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align)) + { + SubmitCommandBufferAndRestartRenderPass("out of uniform space"); + if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align)) + Panic("Failed to allocate uniform space."); + } + + return m_uniform_buffer.GetCurrentHostPointer(); +} + +void VulkanDevice::UnmapUniformBuffer(u32 size) +{ + m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); + m_uniform_buffer.CommitMemory(size); + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; +} + +bool VulkanDevice::CreateNullTexture() +{ + m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, + VK_FORMAT_R8G8B8A8_UNORM); + if (!m_null_texture) + return false; + + const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; + const VkClearColorValue ccv{}; + m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::ClearDst); + vkCmdClearColorImage(cmdbuf, m_null_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &ccv, 1, &srr); + m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::General); + Vulkan::SetObjectName(m_device, m_null_texture->GetImage(), "Null texture"); + Vulkan::SetObjectName(m_device, m_null_texture->GetView(), "Null texture view"); + + // Bind null texture and point sampler state to all. + const VkSampler point_sampler = GetSampler(GPUSampler::GetNearestConfig()); + if (point_sampler == VK_NULL_HANDLE) + return false; + + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + m_current_textures[i] = m_null_texture.get(); + m_current_samplers[i] = point_sampler; + } + + return true; +} + +bool VulkanDevice::CreatePipelineLayouts() +{ + Vulkan::DescriptorSetLayoutBuilder dslb; + Vulkan::PipelineLayoutBuilder plb; + + { + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_ubo_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_ubo_ds_layout, "UBO Descriptor Set Layout"); + } + + { + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout"); + } + + { + dslb.AddBinding(0, + m_features.texture_buffers_emulated_with_ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_single_texture_buffer_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_single_texture_buffer_ds_layout, "Texture Buffer Descriptor Set Layout"); + } + + { + if (m_optional_extensions.vk_khr_push_descriptor) + dslb.SetPushFlag(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); + } + + { + VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; + plb.AddDescriptorSet(m_ubo_ds_layout); + plb.AddDescriptorSet(m_single_texture_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); + } + + { + VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout"); + } + + { + VkPipelineLayout& pl = + m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout"); + } + + { + VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; + plb.AddDescriptorSet(m_ubo_ds_layout); + plb.AddDescriptorSet(m_multi_texture_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); + } + + { + VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; + plb.AddDescriptorSet(m_multi_texture_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); + } + + return true; +} + +void VulkanDevice::DestroyPipelineLayouts() +{ + for (VkPipelineLayout& pl : m_pipeline_layouts) + { + if (pl != VK_NULL_HANDLE) + { + vkDestroyPipelineLayout(m_device, pl, nullptr); + pl = VK_NULL_HANDLE; + } + } + + auto destroy_dsl = [this](VkDescriptorSetLayout& l) { + if (l != VK_NULL_HANDLE) + { + vkDestroyDescriptorSetLayout(m_device, l, nullptr); + l = VK_NULL_HANDLE; + } + }; + destroy_dsl(m_multi_texture_ds_layout); + destroy_dsl(m_single_texture_buffer_ds_layout); + destroy_dsl(m_single_texture_ds_layout); + destroy_dsl(m_ubo_ds_layout); +} + +bool VulkanDevice::CreatePersistentDescriptorSets() +{ + Vulkan::DescriptorSetUpdateBuilder dsub; + + // TODO: is this a bad thing? choosing an upper bound.. so long as it's not going to fetch all of it :/ + m_ubo_descriptor_set = AllocatePersistentDescriptorSet(m_ubo_ds_layout); + if (m_ubo_descriptor_set == VK_NULL_HANDLE) + return false; + dsub.AddBufferDescriptorWrite(m_ubo_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + m_uniform_buffer.GetBuffer(), 0, MAX_UNIFORM_BUFFER_SIZE); + dsub.Update(m_device, false); + + return true; +} + +void VulkanDevice::DestroyPersistentDescriptorSets() +{ + if (m_ubo_descriptor_set != VK_NULL_HANDLE) + FreePersistentDescriptorSet(m_ubo_descriptor_set); +} + +void VulkanDevice::RenderBlankFrame() +{ + VkResult res = m_swap_chain->AcquireNextImage(); + if (res != VK_SUCCESS) + { + Log_ErrorPrintf("Failed to acquire image for blank frame present"); + return; + } + + VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + + const VkImage image = m_swap_chain->GetCurrentImage(); + static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, + VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst); + vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &s_present_clear_color.color, 1, &srr); + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, + VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc); + + SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing()); + MoveToNextCommandBuffer(); + + InvalidateCachedState(); +} + +void VulkanDevice::SetFramebuffer(GPUFramebuffer* fb) +{ + if (m_current_framebuffer == fb) + return; + + if (InRenderPass()) + EndRenderPass(); + + m_current_framebuffer = static_cast(fb); +} + +void VulkanDevice::BeginRenderPass() +{ + DebugAssert(!InRenderPass()); + + VkRenderPassBeginInfo bi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr}; + std::array clear_values; + + if (LIKELY(m_current_framebuffer)) + { + VkFormat rt_format = VK_FORMAT_UNDEFINED; + VkFormat ds_format = VK_FORMAT_UNDEFINED; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + VkAttachmentLoadOp rt_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + VkAttachmentStoreOp rt_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + VkAttachmentLoadOp ds_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + VkAttachmentStoreOp ds_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; + + VulkanTexture* rt = static_cast(m_current_framebuffer->GetRT()); + if (rt) + { + samples = static_cast(rt->GetSamples()); + rt_format = rt->GetVkFormat(); + rt_store_op = VK_ATTACHMENT_STORE_OP_STORE; + + switch (rt->GetState()) + { + case GPUTexture::State::Cleared: + { + std::memcpy(clear_values[0].color.float32, rt->GetUNormClearColor().data(), + sizeof(clear_values[0].color.float32)); + rt_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + rt->SetState(GPUTexture::State::Dirty); + bi.pClearValues = clear_values.data(); + bi.clearValueCount = 1; + } + break; + + case GPUTexture::State::Invalidated: + { + // already DONT_CARE + rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + rt_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + } + break; + + default: + UnreachableCode(); + break; + } + + rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + } + + VulkanTexture* ds = static_cast(m_current_framebuffer->GetDS()); + if (ds) + { + samples = static_cast(ds->GetSamples()); + ds_format = ds->GetVkFormat(); + ds_store_op = VK_ATTACHMENT_STORE_OP_STORE; + + switch (ds->GetState()) + { + case GPUTexture::State::Cleared: + { + const u32 idx = rt ? 1 : 0; + clear_values[idx].depthStencil = {ds->GetClearDepth()}; + ds_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; + ds->SetState(GPUTexture::State::Dirty); + bi.pClearValues = clear_values.data(); + bi.clearValueCount = idx + 1; + } + break; + + case GPUTexture::State::Invalidated: + { + // already DONT_CARE + ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + ds_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + } + break; + + default: + UnreachableCode(); + break; + } + + ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); + ds->SetUseFenceCounter(GetCurrentFenceCounter()); + } + + bi.framebuffer = m_current_framebuffer->GetFramebuffer(); + bi.renderPass = m_current_render_pass = + GetRenderPass(rt_format, ds_format, samples, rt_load_op, rt_store_op, ds_load_op, ds_store_op); + bi.renderArea.extent = {m_current_framebuffer->GetWidth(), m_current_framebuffer->GetHeight()}; + } + else + { + // Re-rendering to swap chain. + bi.framebuffer = m_swap_chain->GetCurrentFramebuffer(); + bi.renderPass = m_current_render_pass = + GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, + VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE); + bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}; + } + + DebugAssert(m_current_render_pass); + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + + // TODO: Stats + vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); + + // If this is a new command buffer, bind the pipeline and such. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + SetInitialPipelineState(); +} + +void VulkanDevice::BeginSwapChainRenderPass() +{ + DebugAssert(!InRenderPass()); + + const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + const VkImage swap_chain_image = m_swap_chain->GetCurrentImage(); + + // Swap chain images start in undefined + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, + VulkanTexture::Layout::Undefined, + VulkanTexture::Layout::ColorAttachment); + + // All textures should be in shader read only optimal already, but just in case.. + const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); + for (u32 i = 0; i < num_textures; i++) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + + const VkRenderPass render_pass = + GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, + VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE); + DebugAssert(render_pass); + + const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + render_pass, + m_swap_chain->GetCurrentFramebuffer(), + {{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, + 1u, + &s_present_clear_color}; + vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); + m_current_render_pass = render_pass; + m_current_framebuffer = nullptr; + + // Clear pipeline, it's likely incompatible. + m_current_pipeline = nullptr; +} + +bool VulkanDevice::InRenderPass() +{ + return m_current_render_pass != VK_NULL_HANDLE; +} + +void VulkanDevice::EndRenderPass() +{ + DebugAssert(m_current_render_pass != VK_NULL_HANDLE); + + // TODO: stats + m_current_render_pass = VK_NULL_HANDLE; + + vkCmdEndRenderPass(GetCurrentCommandBuffer()); +} + +void VulkanDevice::UnbindFramebuffer(VulkanFramebuffer* fb) +{ + if (m_current_framebuffer != fb) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void VulkanDevice::UnbindFramebuffer(VulkanTexture* tex) +{ + if (!m_current_framebuffer) + return; + + if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) + return; + + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void VulkanDevice::SetPipeline(GPUPipeline* pipeline) +{ + // First draw? Bind everything. + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + { + m_current_pipeline = static_cast(pipeline); + if (!m_current_pipeline) + return; + + SetInitialPipelineState(); + return; + } + else if (m_current_pipeline == pipeline) + { + return; + } + + m_current_pipeline = static_cast(pipeline); + + vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline()); + + if (m_current_pipeline_layout != m_current_pipeline->GetLayout()) + { + m_current_pipeline_layout = m_current_pipeline->GetLayout(); + m_dirty_flags |= DIRTY_FLAG_PIPELINE_LAYOUT; + } +} + +void VulkanDevice::UnbindPipeline(VulkanPipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + m_current_pipeline = nullptr; +} + +void VulkanDevice::InvalidateCachedState() +{ + m_dirty_flags = ALL_DIRTY_STATE; + m_current_render_pass = VK_NULL_HANDLE; + m_current_framebuffer = nullptr; + m_current_pipeline = nullptr; +} + +VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const +{ + return m_pipeline_layouts[static_cast(m_current_pipeline_layout)]; +} + +void VulkanDevice::SetInitialPipelineState() +{ + DebugAssert(m_current_pipeline); + m_dirty_flags &= ~DIRTY_FLAG_INITIAL; + + const VkDeviceSize offset = 0; + const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + vkCmdBindVertexBuffers(cmdbuf, 0, 1, m_vertex_buffer.GetBufferPtr(), &offset); + vkCmdBindIndexBuffer(cmdbuf, m_index_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16); + + m_current_pipeline_layout = m_current_pipeline->GetLayout(); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline()); + + const VkViewport vp = {static_cast(m_current_viewport.left), + static_cast(m_current_viewport.top), + static_cast(m_current_viewport.GetWidth()), + static_cast(m_current_viewport.GetHeight()), + 0.0f, + 1.0f}; + vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); + + const VkRect2D vrc = { + {m_current_scissor.left, m_current_scissor.top}, + {static_cast(m_current_scissor.GetWidth()), static_cast(m_current_scissor.GetHeight())}}; + vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); +} + +void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + VulkanTexture* T = static_cast(texture); + const VkSampler vsampler = sampler ? static_cast(sampler)->GetSampler() : VK_NULL_HANDLE; + if (m_current_textures[slot] != texture || m_current_samplers[slot] != vsampler) + { + m_current_textures[slot] = T; + m_current_samplers[slot] = vsampler; + m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; + } + + if (T) + { + T->SetUseFenceCounter(GetCurrentFenceCounter()); + if (T->GetLayout() != VulkanTexture::Layout::ShaderReadOnly) + { + if (InRenderPass()) + EndRenderPass(); + T->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + } + } +} + +void VulkanDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ + DebugAssert(slot == 0); + if (m_current_texture_buffer == buffer) + return; + + m_current_texture_buffer = static_cast(buffer); + if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; +} + +void VulkanDevice::UnbindTexture(VulkanTexture* tex) +{ + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == tex) + { + m_current_textures[i] = m_null_texture.get(); + m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; + } + } +} + +void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf) +{ + if (m_current_texture_buffer != buf) + return; + + m_current_texture_buffer = nullptr; + + if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; +} + +void VulkanDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_current_viewport == rc) + return; + + m_current_viewport = rc; + + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + return; + + const VkViewport vp = { + static_cast(x), static_cast(y), static_cast(width), static_cast(height), 0.0f, 1.0f}; + vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); +} + +void VulkanDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle rc = Common::Rectangle::FromExtents(x, y, width, height); + if (m_current_scissor == rc) + return; + + m_current_scissor = rc; + + if (m_dirty_flags & DIRTY_FLAG_INITIAL) + return; + + const VkRect2D vrc = {{x, y}, {static_cast(width), static_cast(height)}}; + vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); +} + +void VulkanDevice::PreDrawCheck() +{ + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); + const u32 dirty = std::exchange(m_dirty_flags, 0); + if (dirty != 0) + { + if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS)) + { + if (!UpdateDescriptorSets(dirty)) + { + SubmitCommandBufferAndRestartRenderPass("out of descriptor sets"); + PreDrawCheck(); + return; + } + } + } + + if (!InRenderPass()) + BeginRenderPass(); +} + +template +bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets) +{ + std::array ds; + u32 first_ds = 0; + u32 num_ds = 0; + + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) + { + if (new_layout || new_dynamic_offsets) + { + ds[num_ds++] = m_ubo_descriptor_set; + new_dynamic_offsets = true; + } + } + + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || + layout == GPUPipeline::Layout::SingleTextureAndPushConstants) + { + DebugAssert(m_current_textures[0] && m_current_samplers[0] != VK_NULL_HANDLE); + ds[num_ds++] = m_current_textures[0]->GetDescriptorSetWithSampler(m_current_samplers[0]); + } + else if constexpr (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + { + DebugAssert(m_current_texture_buffer); + ds[num_ds++] = m_current_texture_buffer->GetDescriptorSet(); + } + else if constexpr (layout == GPUPipeline::Layout::MultiTextureAndUBO || + layout == GPUPipeline::Layout::MultiTextureAndPushConstants) + { + Vulkan::DescriptorSetUpdateBuilder dsub; + + if (m_optional_extensions.vk_khr_push_descriptor) + { + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); + dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(), + m_current_samplers[i], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_layouts[static_cast(m_current_pipeline_layout)], false); + if (num_ds == 0) + return true; + } + else + { + VkDescriptorSet tds = AllocateDescriptorSet(m_multi_texture_ds_layout); + if (tds == VK_NULL_HANDLE) + return false; + + ds[num_ds++] = tds; + + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); + dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i], + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + dsub.Update(m_device, false); + } + } + + DebugAssert(num_ds > 0); + vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline_layouts[static_cast(m_current_pipeline_layout)], first_ds, num_ds, ds.data(), + static_cast(new_dynamic_offsets), + new_dynamic_offsets ? &m_uniform_buffer_position : nullptr); + + return true; +} + +bool VulkanDevice::UpdateDescriptorSets(u32 dirty) +{ + const bool new_layout = (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) != 0; + const bool new_dynamic_offsets = (dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0; + + switch (m_current_pipeline_layout) + { + case GPUPipeline::Layout::SingleTextureAndUBO: + return UpdateDescriptorSetsForLayout(new_layout, new_dynamic_offsets); + + case GPUPipeline::Layout::SingleTextureAndPushConstants: + return UpdateDescriptorSetsForLayout(new_layout, false); + + case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: + return UpdateDescriptorSetsForLayout(new_layout, false); + + case GPUPipeline::Layout::MultiTextureAndUBO: + return UpdateDescriptorSetsForLayout(new_layout, new_dynamic_offsets); + + case GPUPipeline::Layout::MultiTextureAndPushConstants: + return UpdateDescriptorSetsForLayout(new_layout, false); + + default: + UnreachableCode(); + return false; + } +} + +void VulkanDevice::Draw(u32 vertex_count, u32 base_vertex) +{ + PreDrawCheck(); + vkCmdDraw(GetCurrentCommandBuffer(), vertex_count, 1, base_vertex, 0); +} + +void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + PreDrawCheck(); + vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); +} diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h new file mode 100644 index 000000000..c72e0fba4 --- /dev/null +++ b/src/util/vulkan_device.h @@ -0,0 +1,451 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" +#include "gpu_texture.h" +#include "vulkan_loader.h" +#include "vulkan_stream_buffer.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class VulkanFramebuffer; +class VulkanPipeline; +class VulkanSwapChain; +class VulkanTexture; +class VulkanTextureBuffer; + +struct VK_PIPELINE_CACHE_HEADER; + +class VulkanDevice final : public GPUDevice +{ +public: + friend VulkanTexture; + + enum : u32 + { + NUM_COMMAND_BUFFERS = 3, + }; + + struct OptionalExtensions + { + bool vk_ext_memory_budget : 1; + bool vk_ext_rasterization_order_attachment_access : 1; + bool vk_ext_attachment_feedback_loop_layout : 1; + bool vk_ext_full_screen_exclusive : 1; + bool vk_khr_driver_properties : 1; + bool vk_khr_push_descriptor : 1; + }; + + static GPUTexture::Format GetFormatForVkFormat(VkFormat format); + + static const std::array(GPUTexture::Format::MaxCount)> TEXTURE_FORMAT_MAPPING; + +public: + VulkanDevice(); + ~VulkanDevice() override; + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + + static AdapterAndModeList StaticGetAdapterAndModeList(); + AdapterAndModeList GetAdapterAndModeList() override; + void DestroySurface() override; + + std::string GetDriverInfo() const override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 width, u32 height) override; + void ClearRenderTarget(GPUTexture* t, u32 c) override; + void ClearDepth(GPUTexture* t, float d) override; + void InvalidateRenderTarget(GPUTexture* t) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, DynamicHeapArray* out_binary) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + // Global state accessors + ALWAYS_INLINE static VulkanDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } + ALWAYS_INLINE VkInstance GetVulkanInstance() const { return m_instance; } + ALWAYS_INLINE VkDevice GetVulkanDevice() const { return m_device; } + ALWAYS_INLINE VmaAllocator GetAllocator() const { return m_allocator; } + ALWAYS_INLINE VkPhysicalDevice GetVulkanPhysicalDevice() const { return m_physical_device; } + ALWAYS_INLINE u32 GetGraphicsQueueFamilyIndex() const { return m_graphics_queue_family_index; } + ALWAYS_INLINE u32 GetPresentQueueFamilyIndex() const { return m_present_queue_family_index; } + ALWAYS_INLINE const OptionalExtensions& GetOptionalExtensions() const { return m_optional_extensions; } + + /// Returns true if Vulkan is suitable as a default for the devices in the system. + static bool IsSuitableDefaultRenderer(); + + // The interaction between raster order attachment access and fbfetch is unclear. + ALWAYS_INLINE bool UseFeedbackLoopLayout() const + { + return (m_optional_extensions.vk_ext_attachment_feedback_loop_layout && + !m_optional_extensions.vk_ext_rasterization_order_attachment_access); + } + + // Helpers for getting constants + ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const + { + return static_cast(m_device_properties.limits.optimalBufferCopyOffsetAlignment); + } + ALWAYS_INLINE u32 GetBufferCopyRowPitchAlignment() const + { + return static_cast(m_device_properties.limits.optimalBufferCopyRowPitchAlignment); + } + + void WaitForGPUIdle(); + + // Creates a simple render pass. + VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, + VkAttachmentLoadOp color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, + VkAttachmentStoreOp color_store_op = VK_ATTACHMENT_STORE_OP_STORE, + VkAttachmentLoadOp depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, + VkAttachmentStoreOp depth_store_op = VK_ATTACHMENT_STORE_OP_STORE, + VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE, + bool color_feedback_loop = false, bool depth_sampling = false); + + // Gets a non-clearing version of the specified render pass. Slow, don't call in hot path. + VkRenderPass GetRenderPassForRestarting(VkRenderPass pass); + + // These command buffers are allocated per-frame. They are valid until the command buffer + // is submitted, after that you should call these functions again. + ALWAYS_INLINE VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; } + ALWAYS_INLINE VulkanStreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; } + VkCommandBuffer GetCurrentInitCommandBuffer(); + + /// Allocates a descriptor set from the pool reserved for the current frame. + VkDescriptorSet AllocateDescriptorSet(VkDescriptorSetLayout set_layout); + + /// Allocates a descriptor set from the pool reserved for the current frame. + VkDescriptorSet AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout); + + /// Frees a descriptor set allocated from the global pool. + void FreePersistentDescriptorSet(VkDescriptorSet set); + + // Fence "counters" are used to track which commands have been completed by the GPU. + // If the last completed fence counter is greater or equal to N, it means that the work + // associated counter N has been completed by the GPU. The value of N to associate with + // commands can be retreived by calling GetCurrentFenceCounter(). + u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; } + + // Gets the fence that will be signaled when the currently executing command buffer is + // queued and executed. Do not wait for this fence before the buffer is executed. + // TODO: move out of struct + u64 GetCurrentFenceCounter() const { return m_frame_resources[m_current_frame].fence_counter; } + + // Schedule a vulkan resource for destruction later on. This will occur when the command buffer + // is next re-used, and the GPU has finished working with the specified resource. + void DeferBufferDestruction(VkBuffer object, VmaAllocation allocation); + void DeferFramebufferDestruction(VkFramebuffer object); + void DeferImageDestruction(VkImage object, VmaAllocation allocation); + void DeferImageViewDestruction(VkImageView object); + void DeferPipelineDestruction(VkPipeline object); + void DeferBufferViewDestruction(VkBufferView object); + void DeferPersistentDescriptorSetDestruction(VkDescriptorSet object); + + // Wait for a fence to be completed. + // Also invokes callbacks for completion. + void WaitForFenceCounter(u64 fence_counter); + + /// Ends any render pass, executes the command buffer, and invalidates cached state. + void SubmitCommandBuffer(bool wait_for_completion); + void SubmitCommandBuffer(bool wait_for_completion, const char* reason, ...); + void SubmitCommandBufferAndRestartRenderPass(const char* reason); + + void UnbindFramebuffer(VulkanFramebuffer* fb); + void UnbindFramebuffer(VulkanTexture* tex); + void UnbindPipeline(VulkanPipeline* pl); + void UnbindTexture(VulkanTexture* tex); + void UnbindTextureBuffer(VulkanTextureBuffer* buf); + +protected: + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation) override; + void DestroyDevice() override; + + bool ReadPipelineCache(const std::string& filename) override; + bool GetPipelineCacheData(DynamicHeapArray* data) override; + +private: + enum DIRTY_FLAG : u32 + { + DIRTY_FLAG_INITIAL = (1 << 0), + DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1), + DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 2), + DIRTY_FLAG_TEXTURES_OR_SAMPLERS = (1 << 3), + + ALL_DIRTY_STATE = + DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS, + }; + + union RenderPassCacheKey + { + struct + { + u32 color_format : 8; + u32 depth_format : 8; + u32 samples : 4; + u32 color_load_op : 2; + u32 color_store_op : 1; + u32 depth_load_op : 2; + u32 depth_store_op : 1; + u32 stencil_load_op : 2; + u32 stencil_store_op : 1; + u32 color_feedback_loop : 1; + u32 depth_sampling : 1; + }; + + u32 key; + }; + + struct CommandBuffer + { + // [0] - Init (upload) command buffer, [1] - draw command buffer + VkCommandPool command_pool = VK_NULL_HANDLE; + std::array command_buffers{VK_NULL_HANDLE, VK_NULL_HANDLE}; + VkDescriptorPool descriptor_pool = VK_NULL_HANDLE; + VkFence fence = VK_NULL_HANDLE; + u64 fence_counter = 0; + bool init_buffer_used = false; + bool needs_fence_wait = false; + bool timestamp_written = false; + }; + + using CleanupObjectFunction = void (*)(VulkanDevice& dev, void* obj); + using SamplerMap = std::unordered_map; + + static void GetAdapterAndModeList(AdapterAndModeList* ret, VkInstance instance); + + // Helper method to create a Vulkan instance. + static VkInstance CreateVulkanInstance(const WindowInfo& wi, bool enable_debug_utils, bool enable_validation_layer); + + // Returns a list of Vulkan-compatible GPUs. + using GPUList = std::vector>; + static GPUList EnumerateGPUs(VkInstance instance); + + bool ValidatePipelineCacheHeader(const VK_PIPELINE_CACHE_HEADER& header); + void FillPipelineCacheHeader(VK_PIPELINE_CACHE_HEADER* header); + + // Enable/disable debug message runtime. + bool EnableDebugUtils(); + void DisableDebugUtils(); + + void SubmitCommandBuffer(VulkanSwapChain* present_swap_chain = nullptr, bool submit_on_thread = false); + void MoveToNextCommandBuffer(); + void WaitForPresentComplete(); + + // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. + bool CheckLastPresentFail(); + bool CheckLastSubmitFail(); + + using ExtensionList = std::vector; + static bool SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, bool enable_debug_utils); + bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface); + bool SelectDeviceFeatures(); + bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer); + void ProcessDeviceExtensions(); + + bool CheckFeatures(); + + bool CreateAllocator(); + void DestroyAllocator(); + bool CreateCommandBuffers(); + void DestroyCommandBuffers(); + bool CreatePersistentDescriptorPool(); + void DestroyPersistentDescriptorPool(); + bool CreateNullTexture(); + bool CreateBuffers(); + void DestroyBuffers(); + bool CreatePipelineLayouts(); + void DestroyPipelineLayouts(); + bool CreatePersistentDescriptorSets(); + void DestroyPersistentDescriptorSets(); + VkSampler GetSampler(const GPUSampler::Config& config); + void DestroySamplers(); + + void RenderBlankFrame(); + + bool CheckDownloadBufferSize(u32 required_size); + void DestroyDownloadBuffer(); + + /// Set dirty flags on everything to force re-bind at next draw time. + void InvalidateCachedState(); + + /// Applies any changed state. + VkPipelineLayout GetCurrentVkPipelineLayout() const; + void SetInitialPipelineState(); + void PreDrawCheck(); + + template + bool UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets); + bool UpdateDescriptorSets(u32 dirty); + + // Ends a render pass if we're currently in one. + // When Bind() is next called, the pass will be restarted. + void BeginRenderPass(); + void BeginSwapChainRenderPass(); + void EndRenderPass(); + bool InRenderPass(); + + VkRenderPass CreateCachedRenderPass(RenderPassCacheKey key); + + void BeginCommandBuffer(u32 index); + void WaitForCommandBufferCompletion(u32 index); + + void DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain); + void DoPresent(VulkanSwapChain* present_swap_chain); + void WaitForPresentComplete(std::unique_lock& lock); + void PresentThread(); + void StartPresentThread(); + void StopPresentThread(); + + VkInstance m_instance = VK_NULL_HANDLE; + VkPhysicalDevice m_physical_device = VK_NULL_HANDLE; + VkDevice m_device = VK_NULL_HANDLE; + VmaAllocator m_allocator = VK_NULL_HANDLE; + + VkCommandBuffer m_current_command_buffer = VK_NULL_HANDLE; + + VkDescriptorPool m_global_descriptor_pool = VK_NULL_HANDLE; + + VkQueue m_graphics_queue = VK_NULL_HANDLE; + VkQueue m_present_queue = VK_NULL_HANDLE; + u32 m_graphics_queue_family_index = 0; + u32 m_present_queue_family_index = 0; + + VkQueryPool m_timestamp_query_pool = VK_NULL_HANDLE; + float m_accumulated_gpu_time = 0.0f; + + std::array m_frame_resources; + std::deque>> m_cleanup_objects; // [fence_counter, callback] + u64 m_next_fence_counter = 1; + u64 m_completed_fence_counter = 0; + u32 m_current_frame = 0; + + std::atomic_bool m_last_submit_failed{false}; + std::atomic_bool m_last_present_failed{false}; + std::atomic_bool m_present_done{true}; + std::mutex m_present_mutex; + std::condition_variable m_present_queued_cv; + std::condition_variable m_present_done_cv; + std::thread m_present_thread; + std::atomic_bool m_present_thread_done{false}; + + struct QueuedPresent + { + VulkanSwapChain* swap_chain; + u32 command_buffer_index; + }; + + QueuedPresent m_queued_present = {}; + + std::unordered_map m_render_pass_cache; + VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; + + // TODO: Move to static? + VkDebugUtilsMessengerEXT m_debug_messenger_callback = VK_NULL_HANDLE; + + VkPhysicalDeviceFeatures m_device_features = {}; + VkPhysicalDeviceProperties m_device_properties = {}; + VkPhysicalDeviceDriverPropertiesKHR m_device_driver_properties = {}; + OptionalExtensions m_optional_extensions = {}; + + std::unique_ptr m_swap_chain; + std::unique_ptr m_null_texture; + + VkDescriptorSetLayout m_ubo_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_single_texture_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; + std::array(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {}; + + VulkanStreamBuffer m_vertex_buffer; + VulkanStreamBuffer m_index_buffer; + VulkanStreamBuffer m_uniform_buffer; + VulkanStreamBuffer m_texture_upload_buffer; + + VkDescriptorSet m_ubo_descriptor_set = VK_NULL_HANDLE; + u32 m_uniform_buffer_position = 0; + + SamplerMap m_sampler_map; + + VmaAllocation m_download_buffer_allocation = VK_NULL_HANDLE; + VkBuffer m_download_buffer = VK_NULL_HANDLE; + u8* m_download_buffer_map = nullptr; + u32 m_download_buffer_size = 0; + + // Which bindings/state has to be updated before the next draw. + u32 m_dirty_flags = ALL_DIRTY_STATE; + + VulkanFramebuffer* m_current_framebuffer = nullptr; + VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + + VulkanPipeline* m_current_pipeline = nullptr; + GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + + std::array m_current_textures = {}; + std::array m_current_samplers = {}; + VulkanTextureBuffer* m_current_texture_buffer = nullptr; + Common::Rectangle m_current_viewport{0, 0, 1, 1}; + Common::Rectangle m_current_scissor{0, 0, 1, 1}; +}; diff --git a/src/util/vulkan_entry_points.h b/src/util/vulkan_entry_points.h new file mode 100644 index 000000000..69cf5ca4c --- /dev/null +++ b/src/util/vulkan_entry_points.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name name; +#include "vulkan_entry_points.inl" +#undef VULKAN_DEVICE_ENTRY_POINT +#undef VULKAN_INSTANCE_ENTRY_POINT +#undef VULKAN_MODULE_ENTRY_POINT + +#ifdef __cplusplus +} +#endif diff --git a/src/common/vulkan/entry_points.inl b/src/util/vulkan_entry_points.inl similarity index 97% rename from src/common/vulkan/entry_points.inl rename to src/util/vulkan_entry_points.inl index dd8df3a9c..a586e30f4 100644 --- a/src/common/vulkan/entry_points.inl +++ b/src/util/vulkan_entry_points.inl @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) // Expands the VULKAN_ENTRY_POINT macro for each function when this file is included. @@ -52,14 +52,6 @@ VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceXlibPresentationSupportKHR, false VULKAN_INSTANCE_ENTRY_POINT(vkCreateWaylandSurfaceKHR, false) #endif -#if defined(VK_USE_PLATFORM_ANDROID_KHR) -VULKAN_INSTANCE_ENTRY_POINT(vkCreateAndroidSurfaceKHR, false) -#endif - -#if defined(VK_USE_PLATFORM_MACOS_MVK) -VULKAN_INSTANCE_ENTRY_POINT(vkCreateMacOSSurfaceMVK, false) -#endif - #if defined(VK_USE_PLATFORM_METAL_EXT) VULKAN_INSTANCE_ENTRY_POINT(vkCreateMetalSurfaceEXT, false) #endif @@ -92,6 +84,9 @@ VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceFeatures2, true) VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceProperties2, true) VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceMemoryProperties2, true) +// VK_EXT_calibrated_timestamps +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, false) + #endif // VULKAN_INSTANCE_ENTRY_POINT #ifdef VULKAN_DEVICE_ENTRY_POINT @@ -228,13 +223,16 @@ VULKAN_DEVICE_ENTRY_POINT(vkGetImageMemoryRequirements2, true) VULKAN_DEVICE_ENTRY_POINT(vkBindBufferMemory2, true) VULKAN_DEVICE_ENTRY_POINT(vkBindImageMemory2, true) -#ifdef SUPPORTS_VULKAN_EXCLUSIVE_FULLSCREEN -VULKAN_DEVICE_ENTRY_POINT(vkAcquireFullScreenExclusiveModeEXT, false) -VULKAN_DEVICE_ENTRY_POINT(vkReleaseFullScreenExclusiveModeEXT, false) -#endif - // Vulkan 1.3 functions. VULKAN_DEVICE_ENTRY_POINT(vkGetDeviceBufferMemoryRequirements, false) VULKAN_DEVICE_ENTRY_POINT(vkGetDeviceImageMemoryRequirements, false) +#ifdef _WIN32 +VULKAN_DEVICE_ENTRY_POINT(vkAcquireFullScreenExclusiveModeEXT, false) +VULKAN_DEVICE_ENTRY_POINT(vkReleaseFullScreenExclusiveModeEXT, false) +#endif + +// VK_KHR_push_descriptor +VULKAN_DEVICE_ENTRY_POINT(vkCmdPushDescriptorSetKHR, false) + #endif // VULKAN_DEVICE_ENTRY_POINT diff --git a/src/util/vulkan_host_display.cpp b/src/util/vulkan_host_display.cpp deleted file mode 100644 index b1fdea74e..000000000 --- a/src/util/vulkan_host_display.cpp +++ /dev/null @@ -1,1236 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#include "vulkan_host_display.h" -#include "common/align.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/scoped_guard.h" -#include "common/string_util.h" -#include "common/vulkan/builders.h" -#include "common/vulkan/context.h" -#include "common/vulkan/shader_cache.h" -#include "common/vulkan/stream_buffer.h" -#include "common/vulkan/swap_chain.h" -#include "common/vulkan/util.h" -#include "core/common_host.h" -#include "core/shader_cache_version.h" -#include "imgui.h" -#include "imgui_impl_vulkan.h" -#include "postprocessing_shadergen.h" -#include -Log_SetChannel(VulkanHostDisplay); - -VulkanHostDisplay::VulkanHostDisplay() = default; - -VulkanHostDisplay::~VulkanHostDisplay() -{ - if (!g_vulkan_context) - return; - - g_vulkan_context->WaitForGPUIdle(); - - DestroyStagingBuffer(); - DestroyResources(); - - Vulkan::ShaderCache::Destroy(); - m_swap_chain.reset(); - Vulkan::Context::Destroy(); - - AssertMsg(!g_vulkan_context, "Context should have been destroyed by now"); - AssertMsg(!m_swap_chain, "Swap chain should have been destroyed by now"); -} - -RenderAPI VulkanHostDisplay::GetRenderAPI() const -{ - return RenderAPI::Vulkan; -} - -void* VulkanHostDisplay::GetDevice() const -{ - return nullptr; -} - -void* VulkanHostDisplay::GetContext() const -{ - return nullptr; -} - -bool VulkanHostDisplay::ChangeWindow(const WindowInfo& new_wi) -{ - g_vulkan_context->WaitForGPUIdle(); - - if (new_wi.type == WindowInfo::Type::Surfaceless) - { - g_vulkan_context->ExecuteCommandBuffer(true); - m_swap_chain.reset(); - m_window_info = new_wi; - return true; - } - - // recreate surface in existing swap chain if it already exists - if (m_swap_chain) - { - if (m_swap_chain->RecreateSurface(new_wi)) - { - m_window_info = m_swap_chain->GetWindowInfo(); - return true; - } - - m_swap_chain.reset(); - } - - WindowInfo wi_copy(new_wi); - VkSurfaceKHR surface = Vulkan::SwapChain::CreateVulkanSurface(g_vulkan_context->GetVulkanInstance(), - g_vulkan_context->GetPhysicalDevice(), &wi_copy); - if (surface == VK_NULL_HANDLE) - { - Log_ErrorPrintf("Failed to create new surface for swap chain"); - return false; - } - - m_swap_chain = Vulkan::SwapChain::Create(wi_copy, surface, false); - if (!m_swap_chain) - { - Log_ErrorPrintf("Failed to create swap chain"); - Vulkan::SwapChain::DestroyVulkanSurface(g_vulkan_context->GetVulkanInstance(), &wi_copy, surface); - return false; - } - - m_window_info = m_swap_chain->GetWindowInfo(); - m_vsync_enabled = m_swap_chain->IsVSyncEnabled(); - return true; -} - -void VulkanHostDisplay::ResizeWindow(s32 new_window_width, s32 new_window_height) -{ - g_vulkan_context->WaitForGPUIdle(); - - if (!m_swap_chain->ResizeSwapChain(new_window_width, new_window_height)) - Panic("Failed to resize swap chain"); - - m_window_info = m_swap_chain->GetWindowInfo(); - m_vsync_enabled = m_swap_chain->IsVSyncEnabled(); -} - -bool VulkanHostDisplay::SupportsFullscreen() const -{ - return false; -} - -bool VulkanHostDisplay::IsFullscreen() -{ - return false; -} - -bool VulkanHostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) -{ - return false; -} - -HostDisplay::AdapterAndModeList VulkanHostDisplay::GetAdapterAndModeList() -{ - return StaticGetAdapterAndModeList(m_window_info.type != WindowInfo::Type::Surfaceless ? &m_window_info : nullptr); -} - -void VulkanHostDisplay::DestroySurface() -{ - m_window_info.SetSurfaceless(); - g_vulkan_context->WaitForGPUIdle(); - m_swap_chain.reset(); -} - -std::unique_ptr VulkanHostDisplay::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, - u32 data_stride, bool dynamic /* = false */) -{ - const VkFormat vk_format = Vulkan::Texture::GetVkFormat(format); - if (vk_format == VK_FORMAT_UNDEFINED) - return {}; - - static constexpr VkImageUsageFlags usage = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - - std::unique_ptr texture(std::make_unique()); - if (!texture->Create(width, height, levels, layers, vk_format, static_cast(samples), - (layers > 1) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - usage)) - { - return {}; - } - - texture->TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - if (data) - { - texture->Update(0, 0, width, height, 0, 0, data, data_stride); - } - else - { - // clear it instead so we don't read uninitialized data (and keep the validation layer happy!) - static constexpr VkClearColorValue ccv = {}; - static constexpr VkImageSubresourceRange isr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; - vkCmdClearColorImage(g_vulkan_context->GetCurrentCommandBuffer(), texture->GetImage(), texture->GetLayout(), &ccv, - 1u, &isr); - } - - texture->TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - return texture; -} - -bool VulkanHostDisplay::BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - return static_cast(texture)->BeginUpdate(width, height, out_buffer, out_pitch); -} - -void VulkanHostDisplay::EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) -{ - static_cast(texture)->EndUpdate(x, y, width, height, 0, 0); -} - -bool VulkanHostDisplay::UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, - u32 pitch) -{ - return static_cast(texture)->Update(x, y, width, height, 0, 0, data, pitch); -} - -bool VulkanHostDisplay::SupportsTextureFormat(GPUTexture::Format format) const -{ - const VkFormat vk_format = Vulkan::Texture::GetVkFormat(format); - if (vk_format == VK_FORMAT_UNDEFINED) - return false; - - VkFormatProperties fp = {}; - vkGetPhysicalDeviceFormatProperties(g_vulkan_context->GetPhysicalDevice(), vk_format, &fp); - - const VkFormatFeatureFlags required = (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT); - return ((fp.optimalTilingFeatures & required) == required); -} - -void VulkanHostDisplay::SetVSync(bool enabled) -{ - if (!m_swap_chain || m_swap_chain->IsVSyncEnabled() == enabled) - return; - - // This swap chain should not be used by the current buffer, thus safe to destroy. - g_vulkan_context->WaitForGPUIdle(); - m_swap_chain->SetVSync(enabled); - m_vsync_enabled = m_swap_chain->IsVSyncEnabled(); -} - -bool VulkanHostDisplay::CreateDevice(const WindowInfo& wi, bool vsync) -{ - WindowInfo local_wi(wi); - bool result = - Vulkan::Context::Create(g_settings.gpu_adapter, &local_wi, &m_swap_chain, g_settings.gpu_threaded_presentation, - g_settings.gpu_use_debug_device, g_settings.gpu_use_debug_device, vsync); - - // If validation layers were enabled, try without. - if (!result && g_settings.gpu_use_debug_device) - { - Log_WarningPrintf("Failed to create Vulkan context with validation layers, trying without."); - result = Vulkan::Context::Create(g_settings.gpu_adapter, &local_wi, &m_swap_chain, - g_settings.gpu_threaded_presentation, false, false, vsync); - } - - if (!result) - { - Log_ErrorPrintf("Failed to create Vulkan context"); - m_window_info = {}; - return false; - } - - Vulkan::ShaderCache::Create(EmuFolders::Cache, SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device); - - m_is_adreno = (g_vulkan_context->GetDeviceProperties().vendorID == 0x5143 || - g_vulkan_context->GetDeviceDriverProperties().driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY); - - m_window_info = m_swap_chain ? m_swap_chain->GetWindowInfo() : local_wi; - m_vsync_enabled = m_swap_chain ? m_swap_chain->IsVSyncEnabled() : false; - return true; -} - -bool VulkanHostDisplay::SetupDevice() -{ - if (!CreateResources()) - return false; - - return true; -} - -bool VulkanHostDisplay::HasDevice() const -{ - return static_cast(g_vulkan_context); -} - -bool VulkanHostDisplay::HasSurface() const -{ - return static_cast(m_swap_chain); -} - -VkRenderPass VulkanHostDisplay::GetRenderPassForDisplay() const -{ - if (m_swap_chain) - { - return m_swap_chain->GetClearRenderPass(); - } - else - { - // If we're running headless, assume RGBA8. - return g_vulkan_context->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, - VK_ATTACHMENT_LOAD_OP_CLEAR); - } -} - -void VulkanHostDisplay::DestroyStagingBuffer() -{ - if (m_readback_staging_buffer == VK_NULL_HANDLE) - return; - - vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_readback_staging_buffer, m_readback_staging_allocation); - - // unmapped as part of the buffer destroy - m_readback_staging_buffer = VK_NULL_HANDLE; - m_readback_staging_allocation = VK_NULL_HANDLE; - m_readback_staging_buffer_map = nullptr; - m_readback_staging_buffer_size = 0; -} - -bool VulkanHostDisplay::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - Vulkan::Texture* tex = static_cast(texture); - - const u32 pitch = tex->CalcUpdatePitch(width); - const u32 size = pitch * height; - const u32 level = 0; - if (!CheckStagingBufferSize(size)) - { - Log_ErrorPrintf("Can't read back %ux%u", width, height); - return false; - } - - { - const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "VulkanHostDisplay::DownloadTexture(%u,%u)", width, height); - - VkImageLayout old_layout = tex->GetLayout(); - if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - tex->TransitionSubresourcesToLayout(cmdbuf, level, 1, 0, 1, old_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - VkBufferImageCopy image_copy = {}; - const VkImageAspectFlags aspect = Vulkan::Util::IsDepthFormat(static_cast(tex->GetFormat())) ? - VK_IMAGE_ASPECT_DEPTH_BIT : - VK_IMAGE_ASPECT_COLOR_BIT; - image_copy.bufferOffset = 0; - image_copy.bufferRowLength = tex->CalcUpdateRowLength(pitch); - image_copy.bufferImageHeight = 0; - image_copy.imageSubresource = {aspect, level, 0u, 1u}; - image_copy.imageOffset = {static_cast(x), static_cast(y), 0}; - image_copy.imageExtent = {width, height, 1u}; - - // invalidate gpu cache - // TODO: Needed? - Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, 0, VK_ACCESS_TRANSFER_WRITE_BIT, 0, size, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - - // do the copy - vkCmdCopyImageToBuffer(cmdbuf, tex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_readback_staging_buffer, 1, - &image_copy); - - // flush gpu cache - Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_HOST_READ_BIT, 0, size, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_PIPELINE_STAGE_HOST_BIT); - - if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) - { - tex->TransitionSubresourcesToLayout(cmdbuf, level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, old_layout); - } - } - - g_vulkan_context->ExecuteCommandBuffer(true); - - // invalidate cpu cache before reading - VkResult res = vmaInvalidateAllocation(g_vulkan_context->GetAllocator(), m_readback_staging_allocation, 0, size); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: "); - - StringUtil::StrideMemCpy(out_data, out_data_stride, m_readback_staging_buffer_map, pitch, - std::min(pitch, out_data_stride), height); - - return true; -} - -bool VulkanHostDisplay::CheckStagingBufferSize(u32 required_size) -{ - if (m_readback_staging_buffer_size >= required_size) - return true; - - DestroyStagingBuffer(); - - const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - nullptr, - 0u, - required_size, - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_SHARING_MODE_EXCLUSIVE, - 0u, - nullptr}; - - VmaAllocationCreateInfo aci = {}; - aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; - aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - aci.preferredFlags = m_is_adreno ? (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - - VmaAllocationInfo ai = {}; - VkResult res = vmaCreateBuffer(g_vulkan_context->GetAllocator(), &bci, &aci, &m_readback_staging_buffer, - &m_readback_staging_allocation, &ai); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); - return false; - } - - m_readback_staging_buffer_map = static_cast(ai.pMappedData); - return true; -} - -bool VulkanHostDisplay::CreateResources() -{ - static constexpr char fullscreen_quad_vertex_shader[] = R"( -#version 450 core - -layout(push_constant) uniform PushConstants { - uniform vec4 u_src_rect; -}; - -layout(location = 0) out vec2 v_tex0; - -void main() -{ - vec2 pos = vec2(float((gl_VertexIndex << 1) & 2), float(gl_VertexIndex & 2)); - v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; - gl_Position = vec4(pos * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); - gl_Position.y = -gl_Position.y; -} -)"; - - static constexpr char display_fragment_shader_src[] = R"( -#version 450 core - -layout(set = 0, binding = 0) uniform sampler2D samp0; - -layout(location = 0) in vec2 v_tex0; -layout(location = 0) out vec4 o_col0; - -void main() -{ - o_col0 = vec4(texture(samp0, v_tex0).rgb, 1.0); -} -)"; - - static constexpr char cursor_fragment_shader_src[] = R"( -#version 450 core - -layout(set = 0, binding = 0) uniform sampler2D samp0; - -layout(location = 0) in vec2 v_tex0; -layout(location = 0) out vec4 o_col0; - -void main() -{ - o_col0 = texture(samp0, v_tex0); -} -)"; - - VkDevice device = g_vulkan_context->GetDevice(); - VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); - - Vulkan::DescriptorSetLayoutBuilder dslbuilder; - dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_descriptor_set_layout = dslbuilder.Create(device); - if (m_descriptor_set_layout == VK_NULL_HANDLE) - return false; - - Vulkan::PipelineLayoutBuilder plbuilder; - plbuilder.AddDescriptorSet(m_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(PushConstants)); - m_pipeline_layout = plbuilder.Create(device); - if (m_pipeline_layout == VK_NULL_HANDLE) - return false; - - dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_post_process_descriptor_set_layout = dslbuilder.Create(device); - if (m_post_process_descriptor_set_layout == VK_NULL_HANDLE) - return false; - - plbuilder.AddDescriptorSet(m_post_process_descriptor_set_layout); - plbuilder.AddPushConstants(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, - FrontendCommon::PostProcessingShader::PUSH_CONSTANT_SIZE_THRESHOLD); - m_post_process_pipeline_layout = plbuilder.Create(device); - if (m_post_process_pipeline_layout == VK_NULL_HANDLE) - return false; - - dslbuilder.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); - dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); - m_post_process_ubo_descriptor_set_layout = dslbuilder.Create(device); - if (m_post_process_ubo_descriptor_set_layout == VK_NULL_HANDLE) - return false; - - plbuilder.AddDescriptorSet(m_post_process_ubo_descriptor_set_layout); - m_post_process_ubo_pipeline_layout = plbuilder.Create(device); - if (m_post_process_ubo_pipeline_layout == VK_NULL_HANDLE) - return false; - - VkShaderModule vertex_shader = g_vulkan_shader_cache->GetVertexShader(fullscreen_quad_vertex_shader); - if (vertex_shader == VK_NULL_HANDLE) - return false; - - VkShaderModule display_fragment_shader = g_vulkan_shader_cache->GetFragmentShader(display_fragment_shader_src); - VkShaderModule cursor_fragment_shader = g_vulkan_shader_cache->GetFragmentShader(cursor_fragment_shader_src); - if (display_fragment_shader == VK_NULL_HANDLE || cursor_fragment_shader == VK_NULL_HANDLE) - return false; - - Vulkan::GraphicsPipelineBuilder gpbuilder; - gpbuilder.SetVertexShader(vertex_shader); - gpbuilder.SetFragmentShader(display_fragment_shader); - gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - gpbuilder.SetPipelineLayout(m_pipeline_layout); - gpbuilder.SetRenderPass(GetRenderPassForDisplay(), 0); - - m_display_pipeline = gpbuilder.Create(device, pipeline_cache, false); - if (m_display_pipeline == VK_NULL_HANDLE) - return false; - - gpbuilder.SetFragmentShader(cursor_fragment_shader); - gpbuilder.SetBlendAttachment(0, true, VK_BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD); - m_cursor_pipeline = gpbuilder.Create(device, pipeline_cache, false); - if (m_cursor_pipeline == VK_NULL_HANDLE) - return false; - - // don't need these anymore - vkDestroyShaderModule(device, vertex_shader, nullptr); - vkDestroyShaderModule(device, display_fragment_shader, nullptr); - vkDestroyShaderModule(device, cursor_fragment_shader, nullptr); - - Vulkan::SamplerBuilder sbuilder; - sbuilder.SetPointSampler(VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - m_point_sampler = sbuilder.Create(device, true); - if (m_point_sampler == VK_NULL_HANDLE) - return false; - - sbuilder.SetLinearSampler(false, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); - m_linear_sampler = sbuilder.Create(device); - if (m_linear_sampler == VK_NULL_HANDLE) - return false; - - sbuilder.SetPointSampler(VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); - sbuilder.SetBorderColor(VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); - m_border_sampler = sbuilder.Create(device); - if (m_border_sampler == VK_NULL_HANDLE) - return false; - - return true; -} - -void VulkanHostDisplay::DestroyResources() -{ - HostDisplay::DestroyResources(); - - Vulkan::Util::SafeDestroyPipelineLayout(m_post_process_pipeline_layout); - Vulkan::Util::SafeDestroyPipelineLayout(m_post_process_ubo_pipeline_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_post_process_descriptor_set_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_post_process_ubo_descriptor_set_layout); - m_post_processing_input_texture.Destroy(false); - Vulkan::Util::SafeDestroyFramebuffer(m_post_processing_input_framebuffer); - m_post_processing_stages.clear(); - m_post_processing_ubo.Destroy(true); - m_post_processing_chain.ClearStages(); - - Vulkan::Util::SafeDestroyPipeline(m_display_pipeline); - Vulkan::Util::SafeDestroyPipeline(m_cursor_pipeline); - Vulkan::Util::SafeDestroyPipelineLayout(m_pipeline_layout); - Vulkan::Util::SafeDestroyDescriptorSetLayout(m_descriptor_set_layout); - Vulkan::Util::SafeDestroySampler(m_border_sampler); - Vulkan::Util::SafeDestroySampler(m_point_sampler); - Vulkan::Util::SafeDestroySampler(m_linear_sampler); -} - -bool VulkanHostDisplay::CreateImGuiContext() -{ - const VkRenderPass render_pass = - m_swap_chain ? m_swap_chain->GetClearRenderPass() : - g_vulkan_context->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, - VK_ATTACHMENT_LOAD_OP_CLEAR); - if (render_pass == VK_NULL_HANDLE) - return false; - - return ImGui_ImplVulkan_Init(render_pass); -} - -void VulkanHostDisplay::DestroyImGuiContext() -{ - g_vulkan_context->WaitForGPUIdle(); - ImGui_ImplVulkan_Shutdown(); -} - -bool VulkanHostDisplay::UpdateImGuiFontTexture() -{ - // Just in case we were drawing something. - g_vulkan_context->ExecuteCommandBuffer(true); - return ImGui_ImplVulkan_CreateFontsTexture(); -} - -bool VulkanHostDisplay::MakeCurrent() -{ - return true; -} - -bool VulkanHostDisplay::DoneCurrent() -{ - return true; -} - -bool VulkanHostDisplay::Render(bool skip_present) -{ - if (skip_present || !m_swap_chain) - { - if (ImGui::GetCurrentContext()) - ImGui::Render(); - - return false; - } - - // Previous frame needs to be presented before we can acquire the swap chain. - g_vulkan_context->WaitForPresentComplete(); - - VkResult res = m_swap_chain->AcquireNextImage(); - if (res != VK_SUCCESS) - { - if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) - { - ResizeWindow(0, 0); - res = m_swap_chain->AcquireNextImage(); - } - else if (res == VK_ERROR_SURFACE_LOST_KHR) - { - Log_WarningPrint("Surface lost, attempting to recreate"); - if (!m_swap_chain->RecreateSurface(m_window_info)) - { - Log_ErrorPrint("Failed to recreate surface after loss"); - g_vulkan_context->ExecuteCommandBuffer(false); - m_swap_chain.reset(); - return false; - } - - res = m_swap_chain->AcquireNextImage(); - } - - // This can happen when multiple resize events happen in quick succession. - // In this case, just wait until the next frame to try again. - if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) - { - // Still submit the command buffer, otherwise we'll end up with several frames waiting. - LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: "); - g_vulkan_context->ExecuteCommandBuffer(false); - return false; - } - } - - VkCommandBuffer cmdbuffer = g_vulkan_context->GetCurrentCommandBuffer(); - Vulkan::Texture& swap_chain_texture = m_swap_chain->GetCurrentTexture(); - - { - const Vulkan::Util::DebugScope debugScope(cmdbuffer, "VulkanHostDisplay::Render"); - // Swap chain images start in undefined - swap_chain_texture.OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); - swap_chain_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - RenderDisplay(); - - if (ImGui::GetCurrentContext()) - RenderImGui(); - - RenderSoftwareCursor(); - - vkCmdEndRenderPass(cmdbuffer); - Vulkan::Util::EndDebugScope(cmdbuffer); - - swap_chain_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); - } - - g_vulkan_context->SubmitCommandBuffer(m_swap_chain->GetImageAvailableSemaphore(), - m_swap_chain->GetRenderingFinishedSemaphore(), m_swap_chain->GetSwapChain(), - m_swap_chain->GetCurrentImageIndex(), !m_swap_chain->IsVSyncEnabled()); - g_vulkan_context->MoveToNextCommandBuffer(); - - return true; -} - -bool VulkanHostDisplay::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - // in theory we could do this without a swap chain, but postprocessing assumes it for now... - if (!m_swap_chain) - return false; - - const VkFormat format = m_swap_chain ? m_swap_chain->GetTextureFormat() : VK_FORMAT_R8G8B8A8_UNORM; - switch (format) - { - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - *out_format = GPUTexture::Format::RGBA8; - *out_stride = sizeof(u32) * width; - out_pixels->resize(width * height); - break; - - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - *out_format = GPUTexture::Format::BGRA8; - *out_stride = sizeof(u32) * width; - out_pixels->resize(width * height); - break; - - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - *out_format = GPUTexture::Format::RGBA5551; - *out_stride = sizeof(u16) * width; - out_pixels->resize(((width * height) + 1) / 2); - break; - - case VK_FORMAT_R5G6B5_UNORM_PACK16: - *out_format = GPUTexture::Format::RGB565; - *out_stride = sizeof(u16) * width; - out_pixels->resize(((width * height) + 1) / 2); - break; - - default: - Log_ErrorPrintf("Unhandled swap chain pixel format %u", static_cast(format)); - break; - } - - // if we don't have a texture (display off), then just write out nothing. - if (!HasDisplayTexture()) - { - std::fill(out_pixels->begin(), out_pixels->end(), static_cast(0)); - return true; - } - - Vulkan::Texture tex; - if (!tex.Create(width, height, 1, 1, format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) - { - return false; - } - - const VkRenderPass rp = - m_swap_chain ? - m_swap_chain->GetClearRenderPass() : - g_vulkan_context->GetRenderPass(format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (!rp) - return false; - - const VkFramebuffer fb = tex.CreateFramebuffer(rp); - if (!fb) - return false; - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "VulkanHostDisplay::RenderScreenshot: %ux%u", width, height); - tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - if (!m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(fb, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - width, height); - } - else - { - BeginSwapChainRenderPass(fb, width, height); - RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - - vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); - Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); - tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - DownloadTexture(&tex, 0, 0, width, height, out_pixels->data(), *out_stride); - - // destroying these immediately should be safe since nothing's going to access them, and it's not part of the command - // stream - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), fb, nullptr); - tex.Destroy(false); - return true; -} - -void VulkanHostDisplay::BeginSwapChainRenderPass(VkFramebuffer framebuffer, u32 width, u32 height) -{ - const VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; - const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - m_swap_chain->GetClearRenderPass(), - framebuffer, - {{0, 0}, {width, height}}, - 1u, - &clear_value}; - Vulkan::Util::BeginDebugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "VulkanHostDisplay::BeginSwapChainRenderPass"); - vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); -} - -void VulkanHostDisplay::RenderDisplay() -{ - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "VulkanHostDisplay::RenderDisplay"); - if (!HasDisplayTexture()) - { - BeginSwapChainRenderPass(m_swap_chain->GetCurrentFramebuffer(), m_swap_chain->GetWidth(), - m_swap_chain->GetHeight()); - return; - } - - const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); - - if (!m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(m_swap_chain->GetCurrentFramebuffer(), left, top, width, height, - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - m_swap_chain->GetWidth(), m_swap_chain->GetHeight()); - return; - } - - BeginSwapChainRenderPass(m_swap_chain->GetCurrentFramebuffer(), m_swap_chain->GetWidth(), m_swap_chain->GetHeight()); - RenderDisplay(left, top, width, height, static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - IsUsingLinearFiltering()); -} - -void VulkanHostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, Vulkan::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, bool linear_filter) -{ - VkCommandBuffer cmdbuffer = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope( - cmdbuffer, "VulkanHostDisplay::RenderDisplay: {%u,%u} %ux%u | %ux%u | {%u,%u} %ux%u", left, top, width, height, - texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, texture_view_height); - - VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(m_descriptor_set_layout); - if (ds == VK_NULL_HANDLE) - { - Log_ErrorPrintf("Skipping rendering display because of no descriptor set"); - return; - } - - { - Vulkan::DescriptorSetUpdateBuilder dsupdate; - dsupdate.AddCombinedImageSamplerDescriptorWrite( - ds, 0, texture->GetView(), linear_filter ? m_linear_sampler : m_point_sampler, texture->GetLayout()); - dsupdate.Update(g_vulkan_context->GetDevice()); - } - - const float position_adjust = IsUsingLinearFiltering() ? 0.5f : 0.0f; - const float size_adjust = IsUsingLinearFiltering() ? 1.0f : 0.0f; - const PushConstants pc{ - (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), - (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; - - vkCmdBindPipeline(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_display_pipeline); - vkCmdPushConstants(cmdbuffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc); - vkCmdBindDescriptorSets(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &ds, 0, nullptr); - Vulkan::Util::SetViewportAndClampScissor(cmdbuffer, left, top, width, height); - vkCmdDraw(cmdbuffer, 3, 1, 0, 0); -} - -void VulkanHostDisplay::RenderImGui() -{ - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), "Imgui"); - ImGui::Render(); - ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData()); -} - -void VulkanHostDisplay::RenderSoftwareCursor() -{ - if (!HasSoftwareCursor()) - return; - - const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); - RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get()); -} - -void VulkanHostDisplay::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture) -{ - VkCommandBuffer cmdbuffer = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuffer, "VulkanHostDisplay::RenderSoftwareCursor: {%u,%u} %ux%u", left, - top, width, height); - - VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(m_descriptor_set_layout); - if (ds == VK_NULL_HANDLE) - { - Log_ErrorPrintf("Skipping rendering software cursor because of no descriptor set"); - return; - } - - { - Vulkan::DescriptorSetUpdateBuilder dsupdate; - dsupdate.AddCombinedImageSamplerDescriptorWrite(ds, 0, static_cast(texture)->GetView(), - m_linear_sampler); - dsupdate.Update(g_vulkan_context->GetDevice()); - } - - const PushConstants pc{0.0f, 0.0f, 1.0f, 1.0f}; - vkCmdBindPipeline(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_cursor_pipeline); - vkCmdPushConstants(cmdbuffer, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pc), &pc); - vkCmdBindDescriptorSets(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout, 0, 1, &ds, 0, nullptr); - Vulkan::Util::SetViewportAndClampScissor(cmdbuffer, left, top, width, height); - vkCmdDraw(cmdbuffer, 3, 1, 0, 0); -} - -bool VulkanHostDisplay::SetGPUTimingEnabled(bool enabled) -{ - if (g_vulkan_context->SetEnableGPUTiming(enabled)) - { - m_gpu_timing_enabled = enabled; - return true; - } - - return false; -} - -float VulkanHostDisplay::GetAndResetAccumulatedGPUTime() -{ - return g_vulkan_context->GetAndResetAccumulatedGPUTime(); -} - -HostDisplay::AdapterAndModeList VulkanHostDisplay::StaticGetAdapterAndModeList(const WindowInfo* wi) -{ - AdapterAndModeList ret; - std::vector fsmodes; - - if (g_vulkan_context) - { - ret.adapter_names = Vulkan::Context::EnumerateGPUNames(g_vulkan_context->GetVulkanInstance()); - if (wi) - { - fsmodes = Vulkan::SwapChain::GetSurfaceFullscreenModes(g_vulkan_context->GetVulkanInstance(), - g_vulkan_context->GetPhysicalDevice(), *wi); - } - } - else if (Vulkan::LoadVulkanLibrary()) - { - ScopedGuard lib_guard([]() { Vulkan::UnloadVulkanLibrary(); }); - - VkInstance instance = Vulkan::Context::CreateVulkanInstance(nullptr, false, false); - if (instance != VK_NULL_HANDLE) - { - ScopedGuard instance_guard([&instance]() { vkDestroyInstance(instance, nullptr); }); - - if (Vulkan::LoadVulkanInstanceFunctions(instance)) - ret.adapter_names = Vulkan::Context::EnumerateGPUNames(instance); - } - } - - if (!fsmodes.empty()) - { - ret.fullscreen_modes.reserve(fsmodes.size()); - for (const Vulkan::SwapChain::FullscreenModeInfo& fmi : fsmodes) - { - ret.fullscreen_modes.push_back(GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); - } - } - - return ret; -} - -VulkanHostDisplay::PostProcessingStage::PostProcessingStage(PostProcessingStage&& move) - : pipeline(move.pipeline), output_framebuffer(move.output_framebuffer), - output_texture(std::move(move.output_texture)), uniforms_size(move.uniforms_size) -{ - move.output_framebuffer = VK_NULL_HANDLE; - move.pipeline = VK_NULL_HANDLE; - move.uniforms_size = 0; -} - -VulkanHostDisplay::PostProcessingStage::~PostProcessingStage() -{ - if (output_framebuffer != VK_NULL_HANDLE) - g_vulkan_context->DeferFramebufferDestruction(output_framebuffer); - - output_texture.Destroy(true); - if (pipeline != VK_NULL_HANDLE) - g_vulkan_context->DeferPipelineDestruction(pipeline); -} - -bool VulkanHostDisplay::SetPostProcessingChain(const std::string_view& config) -{ - g_vulkan_context->ExecuteCommandBuffer(true); - - if (config.empty()) - { - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return true; - } - - if (!m_post_processing_chain.CreateFromString(config)) - return false; - - m_post_processing_stages.clear(); - - FrontendCommon::PostProcessingShaderGen shadergen(RenderAPI::Vulkan, false); - bool only_use_push_constants = true; - - for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) - { - const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); - const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); - const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); - const bool use_push_constants = shader.UsePushConstants(); - only_use_push_constants &= use_push_constants; - - PostProcessingStage stage; - stage.uniforms_size = shader.GetUniformsSize(); - - VkShaderModule vs_mod = g_vulkan_shader_cache->GetVertexShader(vs); - VkShaderModule fs_mod = g_vulkan_shader_cache->GetFragmentShader(ps); - if (vs_mod == VK_NULL_HANDLE || fs_mod == VK_NULL_HANDLE) - { - Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); - - if (vs_mod != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), vs_mod, nullptr); - if (fs_mod != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), vs_mod, nullptr); - - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - Vulkan::GraphicsPipelineBuilder gpbuilder; - gpbuilder.SetVertexShader(vs_mod); - gpbuilder.SetFragmentShader(fs_mod); - gpbuilder.SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - gpbuilder.SetPipelineLayout(use_push_constants ? m_post_process_pipeline_layout : - m_post_process_ubo_pipeline_layout); - gpbuilder.SetRenderPass(GetRenderPassForDisplay(), 0); - - stage.pipeline = gpbuilder.Create(g_vulkan_context->GetDevice(), g_vulkan_shader_cache->GetPipelineCache()); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), vs_mod, nullptr); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs_mod, nullptr); - if (!stage.pipeline) - { - Log_ErrorPrintf("Failed to compile one or more post-processing pipelines, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), stage.pipeline, (shader.GetName() + "Pipeline").c_str()); - - m_post_processing_stages.push_back(std::move(stage)); - } - - constexpr u32 UBO_SIZE = 1 * 1024 * 1024; - if (!only_use_push_constants && m_post_processing_ubo.GetCurrentSize() < UBO_SIZE && - !m_post_processing_ubo.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UBO_SIZE)) - { - Log_ErrorPrintf("Failed to allocate %u byte uniform buffer for postprocessing", UBO_SIZE); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_post_processing_ubo.GetBuffer(), - "Post Processing Uniform Buffer"); - m_post_processing_timer.Reset(); - return true; -} - -bool VulkanHostDisplay::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) -{ - DebugAssert(!m_post_processing_stages.empty()); - - if (m_post_processing_input_texture.GetWidth() != target_width || - m_post_processing_input_texture.GetHeight() != target_height) - { - if (m_post_processing_input_framebuffer != VK_NULL_HANDLE) - { - g_vulkan_context->DeferFramebufferDestruction(m_post_processing_input_framebuffer); - m_post_processing_input_framebuffer = VK_NULL_HANDLE; - } - - if (!m_post_processing_input_texture.Create(target_width, target_height, 1, 1, m_swap_chain->GetTextureFormat(), - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT) || - (m_post_processing_input_framebuffer = - m_post_processing_input_texture.CreateFramebuffer(GetRenderPassForDisplay())) == VK_NULL_HANDLE) - { - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_post_processing_input_texture.GetImage(), - "Post Processing Input Texture"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_post_processing_input_texture.GetView(), - "Post Processing Input Texture View"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_post_processing_input_texture.GetAllocation(), - "Post Processing Input Texture Memory"); - } - - const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); - for (u32 i = 0; i < target_count; i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) - { - if (pps.output_framebuffer != VK_NULL_HANDLE) - { - g_vulkan_context->DeferFramebufferDestruction(pps.output_framebuffer); - pps.output_framebuffer = VK_NULL_HANDLE; - } - - if (!pps.output_texture.Create(target_width, target_height, 1, 1, m_swap_chain->GetTextureFormat(), - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT) || - (pps.output_framebuffer = pps.output_texture.CreateFramebuffer(GetRenderPassForDisplay())) == VK_NULL_HANDLE) - { - return false; - } - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), pps.output_texture.GetImage(), - "Post Processing Output Texture %u", i); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), pps.output_texture.GetAllocation(), - "Post Processing Output Texture Memory %u", i); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), pps.output_texture.GetView(), - "Post Processing Output Texture View %u", i); - } - } - - return true; -} - -void VulkanHostDisplay::ApplyPostProcessingChain(VkFramebuffer target_fb, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, Vulkan::Texture* texture, - s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, - s32 texture_view_height, u32 target_width, u32 target_height) -{ - VkCommandBuffer cmdbuffer = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope post_scope(cmdbuffer, "VulkanHostDisplay::ApplyPostProcessingChain"); - - if (!CheckPostProcessingRenderTargets(target_width, target_height)) - { - BeginSwapChainRenderPass(target_fb, target_width, target_height); - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - return; - } - - // downsample/upsample - use same viewport for remainder - m_post_processing_input_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - BeginSwapChainRenderPass(m_post_processing_input_framebuffer, target_width, target_height); - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - vkCmdEndRenderPass(cmdbuffer); - Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); - m_post_processing_input_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - const s32 orig_texture_width = texture_view_width; - const s32 orig_texture_height = texture_view_height; - texture = &m_post_processing_input_texture; - texture_view_x = final_left; - texture_view_y = final_top; - texture_view_width = final_width; - texture_view_height = final_height; - - const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; - for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - const Vulkan::Util::DebugScope stage_scope(g_vulkan_context->GetCurrentCommandBuffer(), "Post Processing Stage: %s", - m_post_processing_chain.GetShaderStage(i).GetName().c_str()); - - if (i != final_stage) - { - pps.output_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - BeginSwapChainRenderPass(pps.output_framebuffer, target_width, target_height); - } - else - { - BeginSwapChainRenderPass(target_fb, target_width, target_height); - } - - const bool use_push_constants = m_post_processing_chain.GetShaderStage(i).UsePushConstants(); - VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet( - use_push_constants ? m_post_process_descriptor_set_layout : m_post_process_ubo_descriptor_set_layout); - if (ds == VK_NULL_HANDLE) - { - Log_ErrorPrintf("Skipping rendering display because of no descriptor set"); - return; - } - - Vulkan::DescriptorSetUpdateBuilder dsupdate; - dsupdate.AddCombinedImageSamplerDescriptorWrite(ds, 1, texture->GetView(), m_border_sampler, texture->GetLayout()); - - if (use_push_constants) - { - u8 buffer[FrontendCommon::PostProcessingShader::PUSH_CONSTANT_SIZE_THRESHOLD]; - Assert(pps.uniforms_size <= sizeof(buffer)); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - buffer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, - texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, - static_cast(m_post_processing_timer.GetTimeSeconds())); - - vkCmdPushConstants(cmdbuffer, m_post_process_pipeline_layout, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, pps.uniforms_size, buffer); - - dsupdate.Update(g_vulkan_context->GetDevice()); - vkCmdBindDescriptorSets(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_post_process_pipeline_layout, 0, 1, &ds, 0, - nullptr); - } - else - { - if (!m_post_processing_ubo.ReserveMemory(pps.uniforms_size, - static_cast(g_vulkan_context->GetUniformBufferAlignment()))) - { - Panic("Failed to reserve space in post-processing UBO"); - } - - const u32 offset = m_post_processing_ubo.GetCurrentOffset(); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - m_post_processing_ubo.GetCurrentHostPointer(), texture->GetWidth(), texture->GetHeight(), texture_view_x, - texture_view_y, texture_view_width, texture_view_height, GetWindowWidth(), GetWindowHeight(), - orig_texture_width, orig_texture_height, static_cast(m_post_processing_timer.GetTimeSeconds())); - m_post_processing_ubo.CommitMemory(pps.uniforms_size); - - dsupdate.AddBufferDescriptorWrite(ds, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - m_post_processing_ubo.GetBuffer(), 0, pps.uniforms_size); - dsupdate.Update(g_vulkan_context->GetDevice()); - vkCmdBindDescriptorSets(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_post_process_ubo_pipeline_layout, 0, 1, &ds, - 1, &offset); - } - - vkCmdBindPipeline(cmdbuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pps.pipeline); - - vkCmdDraw(cmdbuffer, 3, 1, 0, 0); - - if (i != final_stage) - { - vkCmdEndRenderPass(cmdbuffer); - Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); - pps.output_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - texture = &pps.output_texture; - } - } -} diff --git a/src/util/vulkan_host_display.h b/src/util/vulkan_host_display.h deleted file mode 100644 index 3733c12c6..000000000 --- a/src/util/vulkan_host_display.h +++ /dev/null @@ -1,144 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin -// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) - -#pragma once -#include "common/timer.h" -#include "common/vulkan/loader.h" -#include "common/vulkan/stream_buffer.h" -#include "common/vulkan/swap_chain.h" -#include "common/window_info.h" -#include "host_display.h" -#include "postprocessing_chain.h" -#include -#include - -namespace Vulkan { -class StreamBuffer; -class SwapChain; -} // namespace Vulkan - -class VulkanHostDisplay final : public HostDisplay -{ -public: - VulkanHostDisplay(); - ~VulkanHostDisplay(); - - RenderAPI GetRenderAPI() const override; - void* GetDevice() const override; - void* GetContext() const override; - - bool HasDevice() const override; - bool HasSurface() const override; - - bool CreateDevice(const WindowInfo& wi, bool vsync) override; - bool SetupDevice() override; - - bool MakeCurrent() override; - bool DoneCurrent() override; - - bool ChangeWindow(const WindowInfo& new_wi) override; - void ResizeWindow(s32 new_window_width, s32 new_window_height) override; - bool SupportsFullscreen() const override; - bool IsFullscreen() override; - bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; - AdapterAndModeList GetAdapterAndModeList() override; - void DestroySurface() override; - - bool SetPostProcessingChain(const std::string_view& config) override; - - std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Format format, const void* data, u32 data_stride, - bool dynamic = false) override; - bool BeginTextureUpdate(GPUTexture* texture, u32 width, u32 height, void** out_buffer, u32* out_pitch) override; - void EndTextureUpdate(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height) override; - bool UpdateTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; - bool SupportsTextureFormat(GPUTexture::Format format) const override; - - void SetVSync(bool enabled) override; - - bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; - - bool SetGPUTimingEnabled(bool enabled) override; - float GetAndResetAccumulatedGPUTime() override; - - static AdapterAndModeList StaticGetAdapterAndModeList(const WindowInfo* wi); - -protected: - struct PushConstants - { - float src_rect_left; - float src_rect_top; - float src_rect_width; - float src_rect_height; - }; - - struct PostProcessingStage - { - PostProcessingStage() = default; - PostProcessingStage(PostProcessingStage&& move); - ~PostProcessingStage(); - - VkPipeline pipeline = VK_NULL_HANDLE; - VkFramebuffer output_framebuffer = VK_NULL_HANDLE; - Vulkan::Texture output_texture; - u32 uniforms_size = 0; - }; - - bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); - void ApplyPostProcessingChain(VkFramebuffer target_fb, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, Vulkan::Texture* texture, s32 texture_view_x, s32 texture_view_y, - s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); - - VkRenderPass GetRenderPassForDisplay() const; - - bool CheckStagingBufferSize(u32 required_size); - void DestroyStagingBuffer(); - - bool CreateResources() override; - void DestroyResources() override; - - bool CreateImGuiContext() override; - void DestroyImGuiContext() override; - bool UpdateImGuiFontTexture() override; - - void BeginSwapChainRenderPass(VkFramebuffer framebuffer, u32 width, u32 height); - void RenderDisplay(); - void RenderImGui(); - void RenderSoftwareCursor(); - - void RenderDisplay(s32 left, s32 top, s32 width, s32 height, Vulkan::Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter); - void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle); - - std::unique_ptr m_swap_chain; - - VkDescriptorSetLayout m_descriptor_set_layout = VK_NULL_HANDLE; - VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; - VkPipeline m_cursor_pipeline = VK_NULL_HANDLE; - VkPipeline m_display_pipeline = VK_NULL_HANDLE; - VkSampler m_point_sampler = VK_NULL_HANDLE; - VkSampler m_linear_sampler = VK_NULL_HANDLE; - VkSampler m_border_sampler = VK_NULL_HANDLE; - - VmaAllocation m_readback_staging_allocation = VK_NULL_HANDLE; - VkBuffer m_readback_staging_buffer = VK_NULL_HANDLE; - u8* m_readback_staging_buffer_map = nullptr; - u32 m_readback_staging_buffer_size = 0; - bool m_is_adreno = false; - - VkDescriptorSetLayout m_post_process_descriptor_set_layout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_post_process_ubo_descriptor_set_layout = VK_NULL_HANDLE; - VkPipelineLayout m_post_process_pipeline_layout = VK_NULL_HANDLE; - VkPipelineLayout m_post_process_ubo_pipeline_layout = VK_NULL_HANDLE; - - FrontendCommon::PostProcessingChain m_post_processing_chain; - Vulkan::Texture m_post_processing_input_texture; - VkFramebuffer m_post_processing_input_framebuffer = VK_NULL_HANDLE; - Vulkan::StreamBuffer m_post_processing_ubo; - std::vector m_post_processing_stages; - Common::Timer m_post_processing_timer; -}; diff --git a/src/common/vulkan/loader.cpp b/src/util/vulkan_loader.cpp similarity index 55% rename from src/common/vulkan/loader.cpp rename to src/util/vulkan_loader.cpp index 10718f202..065607dc9 100644 --- a/src/common/vulkan/loader.cpp +++ b/src/util/vulkan_loader.cpp @@ -1,17 +1,20 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include +// No better place for this.. +#define VMA_IMPLEMENTATION + +#include "vulkan_loader.h" + +#include "common/assert.h" +#include "common/log.h" + #include #include #include #include #include -#define VMA_IMPLEMENTATION 1 - -#include "loader.h" - #ifndef _WIN32 #include #endif @@ -20,24 +23,25 @@ #include #endif +Log_SetChannel(VulkanDevice); + extern "C" { -#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name ds_##name; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name ds_##name; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name ds_##name; -#include "entry_points.inl" +#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name name; +#include "vulkan_entry_points.inl" #undef VULKAN_DEVICE_ENTRY_POINT #undef VULKAN_INSTANCE_ENTRY_POINT #undef VULKAN_MODULE_ENTRY_POINT } -namespace Vulkan { -void ResetVulkanLibraryFunctionPointers() +void Vulkan::ResetVulkanLibraryFunctionPointers() { -#define VULKAN_MODULE_ENTRY_POINT(name, required) ds_##name = nullptr; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) ds_##name = nullptr; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) ds_##name = nullptr; -#include "entry_points.inl" +#define VULKAN_MODULE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) name = nullptr; +#include "vulkan_entry_points.inl" #undef VULKAN_DEVICE_ENTRY_POINT #undef VULKAN_INSTANCE_ENTRY_POINT #undef VULKAN_MODULE_ENTRY_POINT @@ -45,81 +49,76 @@ void ResetVulkanLibraryFunctionPointers() #if defined(_WIN32) -static HMODULE vulkan_module; -static std::atomic_int vulkan_module_ref_count = {0}; +static HMODULE s_vulkan_module; -bool LoadVulkanLibrary() +bool Vulkan::IsVulkanLibraryLoaded() { - // Not thread safe if a second thread calls the loader whilst the first is still in-progress. - if (vulkan_module) - { - vulkan_module_ref_count++; - return true; - } + return s_vulkan_module != NULL; +} - vulkan_module = LoadLibraryA("vulkan-1.dll"); - if (!vulkan_module) +bool Vulkan::LoadVulkanLibrary() +{ + AssertMsg(!s_vulkan_module, "Vulkan module is not loaded."); + + s_vulkan_module = LoadLibraryA("vulkan-1.dll"); + if (!s_vulkan_module) { - std::fprintf(stderr, "Failed to load vulkan-1.dll\n"); + Log_ErrorPrintf("Failed to load vulkan-1.dll"); return false; } bool required_functions_missing = false; auto LoadFunction = [&](FARPROC* func_ptr, const char* name, bool is_required) { - *func_ptr = GetProcAddress(vulkan_module, name); + *func_ptr = GetProcAddress(s_vulkan_module, name); if (!(*func_ptr) && is_required) { - std::fprintf(stderr, "Vulkan: Failed to load required module function %s\n", name); + Log_ErrorPrintf("Vulkan: Failed to load required module function %s", name); required_functions_missing = true; } }; #define VULKAN_MODULE_ENTRY_POINT(name, required) LoadFunction(reinterpret_cast(&name), #name, required); -#include "entry_points.inl" +#include "vulkan_entry_points.inl" #undef VULKAN_MODULE_ENTRY_POINT if (required_functions_missing) { ResetVulkanLibraryFunctionPointers(); - FreeLibrary(vulkan_module); - vulkan_module = nullptr; + FreeLibrary(s_vulkan_module); + s_vulkan_module = nullptr; return false; } - vulkan_module_ref_count++; return true; } -void UnloadVulkanLibrary() +void Vulkan::UnloadVulkanLibrary() { - if ((--vulkan_module_ref_count) > 0) - return; - ResetVulkanLibraryFunctionPointers(); - FreeLibrary(vulkan_module); - vulkan_module = nullptr; + if (s_vulkan_module) + FreeLibrary(s_vulkan_module); + s_vulkan_module = nullptr; } #else -static void* vulkan_module; -static std::atomic_int vulkan_module_ref_count = {0}; +static void* s_vulkan_module; -bool LoadVulkanLibrary() +bool Vulkan::IsVulkanLibraryLoaded() { - // Not thread safe if a second thread calls the loader whilst the first is still in-progress. - if (vulkan_module) - { - vulkan_module_ref_count++; - return true; - } + return s_vulkan_module != nullptr; +} + +bool Vulkan::LoadVulkanLibrary() +{ + AssertMsg(!s_vulkan_module, "Vulkan module is not loaded."); #if defined(__APPLE__) // Check if a path to a specific Vulkan library has been specified. char* libvulkan_env = getenv("LIBVULKAN_PATH"); if (libvulkan_env) - vulkan_module = dlopen(libvulkan_env, RTLD_NOW); - if (!vulkan_module) + s_vulkan_module = dlopen(libvulkan_env, RTLD_NOW); + if (!s_vulkan_module) { unsigned path_size = 0; _NSGetExecutablePath(nullptr, &path_size); @@ -133,79 +132,66 @@ bool LoadVulkanLibrary() if (pos != std::string::npos) { path.erase(pos); - path += "/../Frameworks/libvulkan.dylib"; - vulkan_module = dlopen(path.c_str(), RTLD_NOW); - if (!vulkan_module) - { - path.erase(pos); - path += "/../Frameworks/libMoltenVK.dylib"; - vulkan_module = dlopen(path.c_str(), RTLD_NOW); - } + path += "/../Frameworks/libMoltenVK.dylib"; + s_vulkan_module = dlopen(path.c_str(), RTLD_NOW); } } } - if (!vulkan_module) - { - vulkan_module = dlopen("libvulkan.dylib", RTLD_NOW); - if (!vulkan_module) - vulkan_module = dlopen("libMoltenVK.dylib", RTLD_NOW); - } + if (!s_vulkan_module) + s_vulkan_module = dlopen("libvulkan.dylib", RTLD_NOW); #else // Names of libraries to search. Desktop should use libvulkan.so.1 or libvulkan.so. static const char* search_lib_names[] = {"libvulkan.so.1", "libvulkan.so"}; for (size_t i = 0; i < sizeof(search_lib_names) / sizeof(search_lib_names[0]); i++) { - vulkan_module = dlopen(search_lib_names[i], RTLD_NOW); - if (vulkan_module) + s_vulkan_module = dlopen(search_lib_names[i], RTLD_NOW); + if (s_vulkan_module) break; } #endif - if (!vulkan_module) + if (!s_vulkan_module) { - std::fprintf(stderr, "Failed to load or locate libvulkan.so\n"); + Log_ErrorPrintf("Failed to load or locate libvulkan.so"); return false; } bool required_functions_missing = false; auto LoadFunction = [&](void** func_ptr, const char* name, bool is_required) { - *func_ptr = dlsym(vulkan_module, name); + *func_ptr = dlsym(s_vulkan_module, name); if (!(*func_ptr) && is_required) { - std::fprintf(stderr, "Vulkan: Failed to load required module function %s\n", name); + Log_ErrorPrintf("Vulkan: Failed to load required module function %s", name); required_functions_missing = true; } }; #define VULKAN_MODULE_ENTRY_POINT(name, required) LoadFunction(reinterpret_cast(&name), #name, required); -#include "entry_points.inl" +#include "vulkan_entry_points.inl" #undef VULKAN_MODULE_ENTRY_POINT if (required_functions_missing) { ResetVulkanLibraryFunctionPointers(); - dlclose(vulkan_module); - vulkan_module = nullptr; + dlclose(s_vulkan_module); + s_vulkan_module = nullptr; return false; } - vulkan_module_ref_count++; return true; } -void UnloadVulkanLibrary() +void Vulkan::UnloadVulkanLibrary() { - if ((--vulkan_module_ref_count) > 0) - return; - ResetVulkanLibraryFunctionPointers(); - dlclose(vulkan_module); - vulkan_module = nullptr; + if (s_vulkan_module) + dlclose(s_vulkan_module); + s_vulkan_module = nullptr; } #endif -bool LoadVulkanInstanceFunctions(VkInstance instance) +bool Vulkan::LoadVulkanInstanceFunctions(VkInstance instance) { bool required_functions_missing = false; auto LoadFunction = [&](PFN_vkVoidFunction* func_ptr, const char* name, bool is_required) { @@ -219,13 +205,13 @@ bool LoadVulkanInstanceFunctions(VkInstance instance) #define VULKAN_INSTANCE_ENTRY_POINT(name, required) \ LoadFunction(reinterpret_cast(&name), #name, required); -#include "entry_points.inl" +#include "vulkan_entry_points.inl" #undef VULKAN_INSTANCE_ENTRY_POINT return !required_functions_missing; } -bool LoadVulkanDeviceFunctions(VkDevice device) +bool Vulkan::LoadVulkanDeviceFunctions(VkDevice device) { bool required_functions_missing = false; auto LoadFunction = [&](PFN_vkVoidFunction* func_ptr, const char* name, bool is_required) { @@ -239,10 +225,8 @@ bool LoadVulkanDeviceFunctions(VkDevice device) #define VULKAN_DEVICE_ENTRY_POINT(name, required) \ LoadFunction(reinterpret_cast(&name), #name, required); -#include "entry_points.inl" +#include "vulkan_entry_points.inl" #undef VULKAN_DEVICE_ENTRY_POINT return !required_functions_missing; } - -} // namespace Vulkan \ No newline at end of file diff --git a/src/common/vulkan/loader.h b/src/util/vulkan_loader.h similarity index 78% rename from src/common/vulkan/loader.h rename to src/util/vulkan_loader.h index c6a6b4acd..70d4a951e 100644 --- a/src/common/vulkan/loader.h +++ b/src/util/vulkan_loader.h @@ -1,49 +1,40 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once #define VK_NO_PROTOTYPES -#if defined(WIN32) - +#ifdef _WIN32 #define VK_USE_PLATFORM_WIN32_KHR // vulkan.h pulls in windows.h on Windows, so we need to include our replacement header first -#include "../windows_headers.h" - -#endif - -#if defined(USE_X11) +#include "common/windows_headers.h" +#elif defined(__APPLE__) +#define VK_USE_PLATFORM_METAL_EXT +#elif defined(__ANDROID__) +#define VK_USE_PLATFORM_ANDROID_KHR +#else +#ifdef USE_X11 #define VK_USE_PLATFORM_XLIB_KHR #endif -#if defined(USE_WAYLAND) +#ifdef USE_WAYLAND #define VK_USE_PLATFORM_WAYLAND_KHR #endif - -#if defined(ANDROID) -#define VK_USE_PLATFORM_ANDROID_KHR -#endif - -#if defined(__APPLE__) -// #define VK_USE_PLATFORM_MACOS_MVK -#define VK_USE_PLATFORM_METAL_EXT #endif #include "vulkan/vulkan.h" -// Currently, exclusive fullscreen is only supported on Windows. -#if defined(WIN32) -#define SUPPORTS_VULKAN_EXCLUSIVE_FULLSCREEN 1 -#endif - #if defined(USE_X11) // This breaks a bunch of our code. They shouldn't be #defines in the first place. #ifdef None #undef None #endif +#ifdef Always +#undef Always +#endif #ifdef Status #undef Status #endif @@ -77,12 +68,13 @@ #endif -#include "entry_points.h" +#include "vulkan_entry_points.h" // We include vk_mem_alloc globally, so we don't accidentally include it before the vulkan header somewhere. -#if defined(__clang__) +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wnullability-completeness" +#pragma clang diagnostic ignored "-Wunused-variable" #elif defined(_MSC_VER) #pragma warning(push, 0) #endif @@ -92,18 +84,17 @@ #define VMA_STATS_STRING_ENABLED 0 #include "vulkan/vk_mem_alloc.h" -#if defined(__clang__) +#ifdef __clang__ #pragma clang diagnostic pop #elif defined(_MSC_VER) #pragma warning(pop) #endif namespace Vulkan { - +bool IsVulkanLibraryLoaded(); bool LoadVulkanLibrary(); bool LoadVulkanInstanceFunctions(VkInstance instance); bool LoadVulkanDeviceFunctions(VkDevice device); void UnloadVulkanLibrary(); void ResetVulkanLibraryFunctionPointers(); - } // namespace Vulkan diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp new file mode 100644 index 000000000..fdb5dbaa4 --- /dev/null +++ b/src/util/vulkan_pipeline.cpp @@ -0,0 +1,210 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "vulkan_pipeline.h" +#include "spirv_compiler.h" +#include "vulkan_builders.h" +#include "vulkan_device.h" + +#include "common/assert.h" +#include "common/log.h" + +Log_SetChannel(VulkanDevice); + +static u32 s_next_bad_shader_id = 1; + +VulkanShader::VulkanShader(GPUShaderStage stage, VkShaderModule mod) : GPUShader(stage), m_module(mod) +{ +} + +VulkanShader::~VulkanShader() +{ + vkDestroyShaderModule(VulkanDevice::GetInstance().GetVulkanDevice(), m_module, nullptr); +} + +void VulkanShader::SetDebugName(const std::string_view& name) +{ + Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_module, name); +} + +std::unique_ptr VulkanDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + VkShaderModule mod; + + const VkShaderModuleCreateInfo ci = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, nullptr, 0, data.size(), + reinterpret_cast(data.data())}; + VkResult res = vkCreateShaderModule(m_device, &ci, nullptr, &mod); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateShaderModule() failed: "); + return {}; + } + + return std::unique_ptr(new VulkanShader(stage, mod)); +} + +std::unique_ptr VulkanDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + const char* entry_point, DynamicHeapArray* out_binary) +{ + if (std::strcmp(entry_point, "main") != 0) + { + Log_ErrorPrintf("Entry point must be 'main', but got '%s' instead.", entry_point); + return {}; + } + + const u32 options = (m_debug_device ? SPIRVCompiler::DebugInfo : 0) | SPIRVCompiler::VulkanRules; + + std::optional spirv = SPIRVCompiler::CompileShader(stage, source, options); + if (!spirv.has_value()) + { + Log_ErrorPrintf("Failed to compile shader to SPIR-V."); + return {}; + } + + const size_t spirv_size = spirv->size() * sizeof(SPIRVCompiler::SPIRVCodeType); + if (out_binary) + { + out_binary->resize(spirv_size); + std::memcpy(out_binary->data(), spirv->data(), spirv_size); + } + + return CreateShaderFromBinary(stage, gsl::span(reinterpret_cast(spirv->data()), spirv_size)); +} + +////////////////////////////////////////////////////////////////////////// + +VulkanPipeline::VulkanPipeline(VkPipeline pipeline, Layout layout) + : GPUPipeline(), m_pipeline(pipeline), m_layout(layout) +{ +} + +VulkanPipeline::~VulkanPipeline() +{ + VulkanDevice::GetInstance().DeferPipelineDestruction(m_pipeline); +} + +void VulkanPipeline::SetDebugName(const std::string_view& name) +{ + Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_pipeline, name); +} + +std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // Points + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, // Lines + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, // Triangles + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // TriangleStrips + }}; + + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const VkFormat format_mapping[static_cast( + GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT}, // Float + {VK_FORMAT_R8_UINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8G8B8_UINT, VK_FORMAT_R8G8B8A8_UINT}, // UInt8 + {VK_FORMAT_R8_SINT, VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8B8_SINT, VK_FORMAT_R8G8B8A8_SINT}, // SInt8 + {VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8B8_UNORM, VK_FORMAT_R8G8B8A8_UNORM}, // UNorm8 + {VK_FORMAT_R16_UINT, VK_FORMAT_R16G16_UINT, VK_FORMAT_R16G16B16_UINT, VK_FORMAT_R16G16B16A16_UINT}, // UInt16 + {VK_FORMAT_R16_SINT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16G16B16_SINT, VK_FORMAT_R16G16B16A16_SINT}, // SInt16 + {VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16B16_UNORM, VK_FORMAT_R16G16B16A16_UNORM}, // UNorm16 + {VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT, VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT}, // UInt32 + {VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT, VK_FORMAT_R32G32B32A32_SINT}, // SInt32 + }; + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + VK_CULL_MODE_NONE, // None + VK_CULL_MODE_FRONT_BIT, // Front + VK_CULL_MODE_BACK_BIT, // Back + }}; + + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> compare_mapping = {{ + VK_COMPARE_OP_NEVER, // Never + VK_COMPARE_OP_ALWAYS, // Always + VK_COMPARE_OP_LESS, // Less + VK_COMPARE_OP_LESS_OR_EQUAL, // LessEqual + VK_COMPARE_OP_GREATER, // Greater + VK_COMPARE_OP_GREATER_OR_EQUAL, // GreaterEqual + VK_COMPARE_OP_EQUAL, // Equal + }}; + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + VK_BLEND_FACTOR_ZERO, // Zero + VK_BLEND_FACTOR_ONE, // One + VK_BLEND_FACTOR_SRC_COLOR, // SrcColor + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, // InvSrcColor + VK_BLEND_FACTOR_DST_COLOR, // DstColor + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, // InvDstColor + VK_BLEND_FACTOR_SRC_ALPHA, // SrcAlpha + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, // InvSrcAlpha + VK_BLEND_FACTOR_SRC1_ALPHA, // SrcAlpha1 + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, // InvSrcAlpha1 + VK_BLEND_FACTOR_DST_ALPHA, // DstAlpha + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, // InvDstAlpha + VK_BLEND_FACTOR_CONSTANT_ALPHA, // ConstantAlpha + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, // InvConstantAlpha + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + VK_BLEND_OP_ADD, // Add + VK_BLEND_OP_SUBTRACT, // Subtract + VK_BLEND_OP_REVERSE_SUBTRACT, // ReverseSubtract + VK_BLEND_OP_MIN, // Min + VK_BLEND_OP_MAX, // Max + }}; + + Vulkan::GraphicsPipelineBuilder gpb; + gpb.SetVertexShader(static_cast(config.vertex_shader)->GetModule()); + gpb.SetFragmentShader(static_cast(config.fragment_shader)->GetModule()); + + if (!config.input_layout.vertex_attributes.empty()) + { + gpb.AddVertexBuffer(0, config.input_layout.vertex_stride); + for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; + DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); + gpb.AddVertexAttribute( + i, 0, format_mapping[static_cast(va.type.GetValue())][static_cast(va.components.GetValue() - 1)], + va.offset); + } + gpb.SetPrimitiveTopology(primitives[static_cast(config.primitive)]); + } + + // Line width? + + gpb.SetRasterizationState(VK_POLYGON_MODE_FILL, + cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())], + VK_FRONT_FACE_CLOCKWISE); + if (config.samples > 1) + gpb.SetMultisamples(config.samples, config.per_sample_shading); + gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write, + config.depth.depth_write, compare_mapping[static_cast(config.depth.depth_test.GetValue())]); + gpb.SetNoStencilState(); + + gpb.SetBlendAttachment(0, config.blend.enable, blend_mapping[static_cast(config.blend.src_blend.GetValue())], + blend_mapping[static_cast(config.blend.dst_blend.GetValue())], + op_mapping[static_cast(config.blend.blend_op.GetValue())], + blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())], + blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())], + op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())], config.blend.write_mask); + + const auto blend_constants = config.blend.GetConstantFloatColor(); + gpb.SetBlendConstants(blend_constants[0], blend_constants[1], blend_constants[2], blend_constants[3]); + + gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); + gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); + + gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(config.layout)]); + + const VkRenderPass render_pass = GetRenderPass(TEXTURE_FORMAT_MAPPING[static_cast(config.color_format)], + TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)], + static_cast(config.samples)); + DebugAssert(render_pass); + gpb.SetRenderPass(render_pass, 0); + + const VkPipeline pipeline = gpb.Create(m_device, m_pipeline_cache, false); + if (!pipeline) + return {}; + + return std::unique_ptr(new VulkanPipeline(pipeline, config.layout)); +} diff --git a/src/util/vulkan_pipeline.h b/src/util/vulkan_pipeline.h new file mode 100644 index 000000000..384cf288f --- /dev/null +++ b/src/util/vulkan_pipeline.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_device.h" +#include "vulkan_loader.h" + +class VulkanDevice; + +class VulkanShader final : public GPUShader +{ + friend VulkanDevice; + +public: + ~VulkanShader() override; + + ALWAYS_INLINE VkShaderModule GetModule() const { return m_module; } + + void SetDebugName(const std::string_view& name) override; + +private: + VulkanShader(GPUShaderStage stage, VkShaderModule mod); + + VkShaderModule m_module; +}; + +class VulkanPipeline final : public GPUPipeline +{ + friend VulkanDevice; + +public: + ~VulkanPipeline() override; + + ALWAYS_INLINE VkPipeline GetPipeline() const { return m_pipeline; } + ALWAYS_INLINE Layout GetLayout() const { return m_layout; } + + void SetDebugName(const std::string_view& name) override; + +private: + VulkanPipeline(VkPipeline pipeline, Layout layout); + + VkPipeline m_pipeline; + Layout m_layout; +}; diff --git a/src/common/vulkan/stream_buffer.cpp b/src/util/vulkan_stream_buffer.cpp similarity index 84% rename from src/common/vulkan/stream_buffer.cpp rename to src/util/vulkan_stream_buffer.cpp index 387ab1f44..f9bef2675 100644 --- a/src/common/vulkan/stream_buffer.cpp +++ b/src/util/vulkan_stream_buffer.cpp @@ -1,18 +1,19 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) -#include "stream_buffer.h" -#include "../align.h" -#include "../assert.h" -#include "../log.h" -#include "context.h" -#include "util.h" -Log_SetChannel(Vulkan::StreamBuffer); +#include "vulkan_stream_buffer.h" +#include "vulkan_builders.h" +#include "vulkan_device.h" -namespace Vulkan { -StreamBuffer::StreamBuffer() = default; +#include "common/align.h" +#include "common/assert.h" +#include "common/bitutils.h" +#include "common/log.h" +Log_SetChannel(VulkanDevice); -StreamBuffer::StreamBuffer(StreamBuffer&& move) +VulkanStreamBuffer::VulkanStreamBuffer() = default; + +VulkanStreamBuffer::VulkanStreamBuffer(VulkanStreamBuffer&& move) : m_size(move.m_size), m_current_offset(move.m_current_offset), m_current_space(move.m_current_space), m_current_gpu_position(move.m_current_gpu_position), m_allocation(move.m_allocation), m_buffer(move.m_buffer), m_host_pointer(move.m_host_pointer), m_tracked_fences(std::move(move.m_tracked_fences)) @@ -26,13 +27,13 @@ StreamBuffer::StreamBuffer(StreamBuffer&& move) move.m_host_pointer = nullptr; } -StreamBuffer::~StreamBuffer() +VulkanStreamBuffer::~VulkanStreamBuffer() { if (IsValid()) Destroy(true); } -StreamBuffer& StreamBuffer::operator=(StreamBuffer&& move) +VulkanStreamBuffer& VulkanStreamBuffer::operator=(VulkanStreamBuffer&& move) { if (IsValid()) Destroy(true); @@ -48,7 +49,7 @@ StreamBuffer& StreamBuffer::operator=(StreamBuffer&& move) return *this; } -bool StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) +bool VulkanStreamBuffer::Create(VkBufferUsageFlags usage, u32 size) { const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, @@ -67,7 +68,8 @@ bool StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) VmaAllocationInfo ai = {}; VkBuffer new_buffer = VK_NULL_HANDLE; VmaAllocation new_allocation = VK_NULL_HANDLE; - VkResult res = vmaCreateBuffer(g_vulkan_context->GetAllocator(), &bci, &aci, &new_buffer, &new_allocation, &ai); + VkResult res = + vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &new_buffer, &new_allocation, &ai); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); @@ -88,14 +90,14 @@ bool StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) return true; } -void StreamBuffer::Destroy(bool defer) +void VulkanStreamBuffer::Destroy(bool defer) { if (m_buffer != VK_NULL_HANDLE) { if (defer) - g_vulkan_context->DeferBufferDestruction(m_buffer, m_allocation); + VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_allocation); else - vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_buffer, m_allocation); + vmaDestroyBuffer(VulkanDevice::GetInstance().GetAllocator(), m_buffer, m_allocation); } m_size = 0; @@ -107,7 +109,7 @@ void StreamBuffer::Destroy(bool defer) m_host_pointer = nullptr; } -bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +bool VulkanStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) { const u32 required_bytes = num_bytes + alignment; @@ -176,23 +178,23 @@ bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) return false; } -void StreamBuffer::CommitMemory(u32 final_num_bytes) +void VulkanStreamBuffer::CommitMemory(u32 final_num_bytes) { DebugAssert((m_current_offset + final_num_bytes) <= m_size); DebugAssert(final_num_bytes <= m_current_space); // For non-coherent mappings, flush the memory range - vmaFlushAllocation(g_vulkan_context->GetAllocator(), m_allocation, m_current_offset, final_num_bytes); + vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), m_allocation, m_current_offset, final_num_bytes); m_current_offset += final_num_bytes; m_current_space -= final_num_bytes; UpdateCurrentFencePosition(); } -void StreamBuffer::UpdateCurrentFencePosition() +void VulkanStreamBuffer::UpdateCurrentFencePosition() { // Has the offset changed since the last fence? - const u64 counter = g_vulkan_context->GetCurrentFenceCounter(); + const u64 counter = VulkanDevice::GetInstance().GetCurrentFenceCounter(); if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) { // Still haven't executed a command buffer, so just update the offset. @@ -204,12 +206,12 @@ void StreamBuffer::UpdateCurrentFencePosition() m_tracked_fences.emplace_back(counter, m_current_offset); } -void StreamBuffer::UpdateGPUPosition() +void VulkanStreamBuffer::UpdateGPUPosition() { auto start = m_tracked_fences.begin(); auto end = start; - const u64 completed_counter = g_vulkan_context->GetCompletedFenceCounter(); + const u64 completed_counter = VulkanDevice::GetInstance().GetCompletedFenceCounter(); while (end != m_tracked_fences.end() && completed_counter >= end->first) { m_current_gpu_position = end->second; @@ -229,7 +231,7 @@ void StreamBuffer::UpdateGPUPosition() } } -bool StreamBuffer::WaitForClearSpace(u32 num_bytes) +bool VulkanStreamBuffer::WaitForClearSpace(u32 num_bytes) { u32 new_offset = 0; u32 new_space = 0; @@ -296,16 +298,14 @@ bool StreamBuffer::WaitForClearSpace(u32 num_bytes) // Did any fences satisfy this condition? // Has the command buffer been executed yet? If not, the caller should execute it. - if (iter == m_tracked_fences.end() || iter->first == g_vulkan_context->GetCurrentFenceCounter()) + if (iter == m_tracked_fences.end() || iter->first == VulkanDevice::GetInstance().GetCurrentFenceCounter()) return false; // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - g_vulkan_context->WaitForFenceCounter(iter->first); + VulkanDevice::GetInstance().WaitForFenceCounter(iter->first); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset; m_current_space = new_space; m_current_gpu_position = new_gpu_position; return true; } - -} // namespace Vulkan \ No newline at end of file diff --git a/src/common/vulkan/stream_buffer.h b/src/util/vulkan_stream_buffer.h similarity index 72% rename from src/common/vulkan/stream_buffer.h rename to src/util/vulkan_stream_buffer.h index cdff9a7b4..b0ebe9f69 100644 --- a/src/common/vulkan/stream_buffer.h +++ b/src/util/vulkan_stream_buffer.h @@ -1,30 +1,29 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "../types.h" -#include "loader.h" +#include "vulkan_loader.h" + +#include "common/types.h" + #include #include -namespace Vulkan { - -class StreamBuffer +class VulkanStreamBuffer { public: - StreamBuffer(); - StreamBuffer(StreamBuffer&& move); - StreamBuffer(const StreamBuffer&) = delete; - ~StreamBuffer(); + VulkanStreamBuffer(); + VulkanStreamBuffer(VulkanStreamBuffer&& move); + VulkanStreamBuffer(const VulkanStreamBuffer&) = delete; + ~VulkanStreamBuffer(); - StreamBuffer& operator=(StreamBuffer&& move); - StreamBuffer& operator=(const StreamBuffer&) = delete; + VulkanStreamBuffer& operator=(VulkanStreamBuffer&& move); + VulkanStreamBuffer& operator=(const VulkanStreamBuffer&) = delete; ALWAYS_INLINE bool IsValid() const { return (m_buffer != VK_NULL_HANDLE); } ALWAYS_INLINE VkBuffer GetBuffer() const { return m_buffer; } - ALWAYS_INLINE const VkBuffer* GetBufferPointer() const { return &m_buffer; } - ALWAYS_INLINE VmaAllocation GetAllocation() const { return m_allocation; } + ALWAYS_INLINE const VkBuffer* GetBufferPtr() const { return &m_buffer; } ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } @@ -57,5 +56,3 @@ private: // List of fences and the corresponding positions in the buffer std::deque> m_tracked_fences; }; - -} // namespace Vulkan diff --git a/src/util/vulkan_swap_chain.cpp b/src/util/vulkan_swap_chain.cpp new file mode 100644 index 000000000..88494c431 --- /dev/null +++ b/src/util/vulkan_swap_chain.cpp @@ -0,0 +1,695 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "vulkan_swap_chain.h" +#include "vulkan_builders.h" +#include "vulkan_device.h" + +#include "common/assert.h" +#include "common/log.h" + +#include +#include +#include + +#if defined(VK_USE_PLATFORM_XLIB_KHR) +#include +#endif + +Log_SetChannel(VulkanDevice); + +VulkanSwapChain::VulkanSwapChain(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync, + std::optional exclusive_fullscreen_control) + : m_window_info(wi), m_surface(surface), m_vsync_mode(vsync), + m_exclusive_fullscreen_control(exclusive_fullscreen_control) +{ +} + +VulkanSwapChain::~VulkanSwapChain() +{ + DestroySwapChainImages(); + DestroySwapChain(); + DestroySurface(); +} + +VkSurfaceKHR VulkanSwapChain::CreateVulkanSurface(VkInstance instance, VkPhysicalDevice physical_device, WindowInfo* wi) +{ +#if defined(VK_USE_PLATFORM_WIN32_KHR) + if (wi->type == WindowInfo::Type::Win32) + { + VkWin32SurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkWin32SurfaceCreateFlagsKHR flags + nullptr, // HINSTANCE hinstance + reinterpret_cast(wi->window_handle) // HWND hwnd + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateWin32SurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateWin32SurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + } +#endif + +#if defined(VK_USE_PLATFORM_METAL_EXT) + if (wi->type == WindowInfo::Type::MacOS) + { +#if 0 + // TODO: FIXME + if (!wi->surface_handle && !CocoaTools::CreateMetalLayer(wi)) + return VK_NULL_HANDLE; + + VkMetalSurfaceCreateInfoEXT surface_create_info = {VK_STRUCTURE_TYPE_METAL_SURFACE_CREATE_INFO_EXT, nullptr, 0, + static_cast(wi->surface_handle)}; + + VkSurfaceKHR surface; + VkResult res = vkCreateMetalSurfaceEXT(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateMetalSurfaceEXT failed: "); + return VK_NULL_HANDLE; + } + + return surface; +#else + Panic("Fixme"); + return VK_NULL_HANDLE; +#endif + } +#endif + +#if defined(VK_USE_PLATFORM_ANDROID_KHR) + if (wi->type == WindowInfo::Type::Android) + { + VkAndroidSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAndroidSurfaceCreateFlagsKHR flags + reinterpret_cast(wi->window_handle) // ANativeWindow* window + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateAndroidSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateAndroidSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + } +#endif + +#if defined(VK_USE_PLATFORM_XLIB_KHR) + if (wi->type == WindowInfo::Type::X11) + { + VkXlibSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkXlibSurfaceCreateFlagsKHR flags + static_cast(wi->display_connection), // Display* dpy + reinterpret_cast(wi->window_handle) // Window window + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateXlibSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateXlibSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + } +#endif + +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) + if (wi->type == WindowInfo::Type::Wayland) + { + VkWaylandSurfaceCreateInfoKHR surface_create_info = {VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast(wi->display_connection), + static_cast(wi->window_handle)}; + + VkSurfaceKHR surface; + VkResult res = vkCreateWaylandSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateWaylandSurfaceEXT failed: "); + return VK_NULL_HANDLE; + } + + return surface; + } +#endif + + return VK_NULL_HANDLE; +} + +void VulkanSwapChain::DestroyVulkanSurface(VkInstance instance, WindowInfo* wi, VkSurfaceKHR surface) +{ + vkDestroySurfaceKHR(VulkanDevice::GetInstance().GetVulkanInstance(), surface, nullptr); + +#if defined(__APPLE__) +#if 0 + if (wi->type == WindowInfo::Type::MacOS && wi->surface_handle) + CocoaTools::DestroyMetalLayer(wi); +#else + Panic("TODO"); +#endif +#endif +} + +std::unique_ptr VulkanSwapChain::Create(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync, + std::optional exclusive_fullscreen_control) +{ + std::unique_ptr swap_chain = + std::unique_ptr(new VulkanSwapChain(wi, surface, vsync, exclusive_fullscreen_control)); + if (!swap_chain->CreateSwapChain()) + return nullptr; + + return swap_chain; +} + +static VkFormat GetLinearFormat(VkFormat format) +{ + switch (format) + { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + return format; + } +} + +std::optional VulkanSwapChain::SelectSurfaceFormat(VkSurfaceKHR surface) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + u32 format_count; + VkResult res = vkGetPhysicalDeviceSurfaceFormatsKHR(dev.GetVulkanPhysicalDevice(), surface, &format_count, nullptr); + if (res != VK_SUCCESS || format_count == 0) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); + return std::nullopt; + } + + std::vector surface_formats(format_count); + res = + vkGetPhysicalDeviceSurfaceFormatsKHR(dev.GetVulkanPhysicalDevice(), surface, &format_count, surface_formats.data()); + Assert(res == VK_SUCCESS); + + // If there is a single undefined surface format, the device doesn't care, so we'll just use RGBA + if (surface_formats[0].format == VK_FORMAT_UNDEFINED) + return VkSurfaceFormatKHR{VK_FORMAT_R8G8B8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; + + // Try to find a suitable format. + for (const VkSurfaceFormatKHR& surface_format : surface_formats) + { + // Some drivers seem to return a SRGB format here (Intel Mesa). + // This results in gamma correction when presenting to the screen, which we don't want. + // Use a linear format instead, if this is the case. + return VkSurfaceFormatKHR{GetLinearFormat(surface_format.format), VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; + } + + Log_ErrorPrintf("Failed to find a suitable format for swap chain buffers."); + return std::nullopt; +} + +static const char* PresentModeToString(VkPresentModeKHR mode) +{ + switch (mode) + { + case VK_PRESENT_MODE_IMMEDIATE_KHR: + return "VK_PRESENT_MODE_IMMEDIATE_KHR"; + + case VK_PRESENT_MODE_MAILBOX_KHR: + return "VK_PRESENT_MODE_MAILBOX_KHR"; + + case VK_PRESENT_MODE_FIFO_KHR: + return "VK_PRESENT_MODE_FIFO_KHR"; + + case VK_PRESENT_MODE_FIFO_RELAXED_KHR: + return "VK_PRESENT_MODE_FIFO_RELAXED_KHR"; + + case VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR: + return "VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR"; + + case VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR: + return "VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR"; + + default: + return "UNKNOWN_VK_PRESENT_MODE"; + } +} + +static VkPresentModeKHR GetPreferredPresentModeForVsyncMode(bool mode) +{ + if (mode /*== VsyncMode::On*/) + return VK_PRESENT_MODE_FIFO_KHR; + /*else if (mode == VsyncMode::Adaptive) + return VK_PRESENT_MODE_FIFO_RELAXED_KHR;*/ + else + return VK_PRESENT_MODE_IMMEDIATE_KHR; +} + +std::optional VulkanSwapChain::SelectPresentMode(VkSurfaceKHR surface, bool vsync) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + VkResult res; + u32 mode_count; + res = vkGetPhysicalDeviceSurfacePresentModesKHR(dev.GetVulkanPhysicalDevice(), surface, &mode_count, nullptr); + if (res != VK_SUCCESS || mode_count == 0) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceFormatsKHR failed: "); + return std::nullopt; + } + + std::vector present_modes(mode_count); + res = vkGetPhysicalDeviceSurfacePresentModesKHR(dev.GetVulkanPhysicalDevice(), surface, &mode_count, + present_modes.data()); + Assert(res == VK_SUCCESS); + + // Checks if a particular mode is supported, if it is, returns that mode. + auto CheckForMode = [&present_modes](VkPresentModeKHR check_mode) { + auto it = std::find_if(present_modes.begin(), present_modes.end(), + [check_mode](VkPresentModeKHR mode) { return check_mode == mode; }); + return it != present_modes.end(); + }; + + // Use preferred mode if available. + const VkPresentModeKHR preferred_mode = GetPreferredPresentModeForVsyncMode(vsync); + VkPresentModeKHR selected_mode; + if (CheckForMode(preferred_mode)) + { + selected_mode = preferred_mode; + } + else if (!vsync /*vsync != VsyncMode::On*/ && CheckForMode(VK_PRESENT_MODE_MAILBOX_KHR)) + { + // Prefer mailbox over fifo for adaptive vsync/no-vsync. + selected_mode = VK_PRESENT_MODE_MAILBOX_KHR; + } + else if (vsync /*vsync != VsyncMode::Off*/ && CheckForMode(VK_PRESENT_MODE_FIFO_KHR)) + { + // Fallback to FIFO if we're using any kind of vsync. + // This should never fail, FIFO is mandated. + selected_mode = VK_PRESENT_MODE_FIFO_KHR; + } + else + { + // Fall back to whatever is available. + selected_mode = present_modes[0]; + } + + Log_DevPrintf("(SwapChain) Preferred present mode: %s, selected: %s", PresentModeToString(preferred_mode), + PresentModeToString(selected_mode)); + + return selected_mode; +} + +bool VulkanSwapChain::CreateSwapChain() +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + + // Select swap chain format and present mode + std::optional surface_format = SelectSurfaceFormat(m_surface); + std::optional present_mode = SelectPresentMode(m_surface, m_vsync_mode); + if (!surface_format.has_value() || !present_mode.has_value()) + return false; + + // Look up surface properties to determine image count and dimensions + VkSurfaceCapabilitiesKHR surface_capabilities; + VkResult res = + vkGetPhysicalDeviceSurfaceCapabilitiesKHR(dev.GetVulkanPhysicalDevice(), m_surface, &surface_capabilities); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceCapabilitiesKHR failed: "); + return false; + } + + // Select number of images in swap chain, we prefer one buffer in the background to work on + u32 image_count = std::max(surface_capabilities.minImageCount + 1u, 2u); + + // maxImageCount can be zero, in which case there isn't an upper limit on the number of buffers. + if (surface_capabilities.maxImageCount > 0) + image_count = std::min(image_count, surface_capabilities.maxImageCount); + + // Determine the dimensions of the swap chain. Values of -1 indicate the size we specify here + // determines window size? + VkExtent2D size = surface_capabilities.currentExtent; + if (size.width == UINT32_MAX) + { + size.width = m_window_info.surface_width; + size.height = m_window_info.surface_height; + } + size.width = + std::clamp(size.width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width); + size.height = + std::clamp(size.height, surface_capabilities.minImageExtent.height, surface_capabilities.maxImageExtent.height); + + // Prefer identity transform if possible + VkSurfaceTransformFlagBitsKHR transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + if (!(surface_capabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)) + transform = surface_capabilities.currentTransform; + + VkCompositeAlphaFlagBitsKHR alpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + if (!(surface_capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR)) + { + // If we only support pre-multiplied/post-multiplied... :/ + if (surface_capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) + alpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; + } + + // Select swap chain flags, we only need a colour attachment + VkImageUsageFlags image_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + if ((surface_capabilities.supportedUsageFlags & image_usage) != image_usage) + { + Log_ErrorPrintf("Vulkan: Swap chain does not support usage as color attachment"); + return false; + } + + // Store the old/current swap chain when recreating for resize + // Old swap chain is destroyed regardless of whether the create call succeeds + VkSwapchainKHR old_swap_chain = m_swap_chain; + m_swap_chain = VK_NULL_HANDLE; + + // Now we can actually create the swap chain + VkSwapchainCreateInfoKHR swap_chain_info = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + nullptr, + 0, + m_surface, + image_count, + surface_format->format, + surface_format->colorSpace, + size, + 1u, + image_usage, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + transform, + alpha, + present_mode.value(), + VK_TRUE, + old_swap_chain}; + std::array indices = {{ + dev.GetGraphicsQueueFamilyIndex(), + dev.GetPresentQueueFamilyIndex(), + }}; + if (dev.GetGraphicsQueueFamilyIndex() != dev.GetPresentQueueFamilyIndex()) + { + swap_chain_info.imageSharingMode = VK_SHARING_MODE_CONCURRENT; + swap_chain_info.queueFamilyIndexCount = 2; + swap_chain_info.pQueueFamilyIndices = indices.data(); + } + +#ifdef _WIN32 + VkSurfaceFullScreenExclusiveInfoEXT exclusive_info = {VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_INFO_EXT}; + VkSurfaceFullScreenExclusiveWin32InfoEXT exclusive_win32_info = { + VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_WIN32_INFO_EXT}; + if (m_exclusive_fullscreen_control.has_value()) + { + if (dev.GetOptionalExtensions().vk_ext_full_screen_exclusive) + { + exclusive_info.fullScreenExclusive = + (m_exclusive_fullscreen_control.value() ? VK_FULL_SCREEN_EXCLUSIVE_ALLOWED_EXT : + VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT); + + exclusive_win32_info.hmonitor = + MonitorFromWindow(reinterpret_cast(m_window_info.window_handle), MONITOR_DEFAULTTONEAREST); + if (!exclusive_win32_info.hmonitor) + Log_ErrorPrintf("MonitorFromWindow() for exclusive fullscreen exclusive override failed."); + + Vulkan::AddPointerToChain(&swap_chain_info, &exclusive_info); + Vulkan::AddPointerToChain(&swap_chain_info, &exclusive_win32_info); + } + else + { + Log_ErrorPrintf("Exclusive fullscreen control requested, but VK_EXT_full_screen_exclusive is not supported."); + } + } +#else + if (m_exclusive_fullscreen_control.has_value()) + Log_ErrorPrintf("Exclusive fullscreen control requested, but is not supported on this platform."); +#endif + + res = vkCreateSwapchainKHR(dev.GetVulkanDevice(), &swap_chain_info, nullptr, &m_swap_chain); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSwapchainKHR failed: "); + return false; + } + + // Now destroy the old swap chain, since it's been recreated. + // We can do this immediately since all work should have been completed before calling resize. + if (old_swap_chain != VK_NULL_HANDLE) + vkDestroySwapchainKHR(dev.GetVulkanDevice(), old_swap_chain, nullptr); + + m_format = surface_format->format; + m_window_info.surface_width = std::max(1u, size.width); + m_window_info.surface_height = std::max(1u, size.height); + m_window_info.surface_format = VulkanDevice::GetFormatForVkFormat(surface_format->format); + if (m_window_info.surface_format == GPUTexture::Format::Unknown) + { + Log_ErrorPrintf("Unknown Vulkan surface format %u", static_cast(surface_format->format)); + return false; + } + + // Get and create images. + Assert(m_images.empty()); + + res = vkGetSwapchainImagesKHR(dev.GetVulkanDevice(), m_swap_chain, &image_count, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetSwapchainImagesKHR failed: "); + return false; + } + + std::vector images(image_count); + res = vkGetSwapchainImagesKHR(dev.GetVulkanDevice(), m_swap_chain, &image_count, images.data()); + Assert(res == VK_SUCCESS); + + VkRenderPass render_pass = + dev.GetRenderPass(m_format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); + if (render_pass == VK_NULL_HANDLE) + return false; + + Vulkan::FramebufferBuilder fbb; + m_images.reserve(image_count); + m_current_image = 0; + for (u32 i = 0; i < image_count; i++) + { + Image image = {}; + image.image = images[i]; + + const VkImageViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + images[i], + VK_IMAGE_VIEW_TYPE_2D, + m_format, + {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}, + }; + if ((res = vkCreateImageView(dev.GetVulkanDevice(), &view_info, nullptr, &image.view)) != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView() failed: "); + return false; + } + + fbb.AddAttachment(image.view); + fbb.SetRenderPass(render_pass); + fbb.SetSize(size.width, size.height, 1); + if ((image.framebuffer = fbb.Create(dev.GetVulkanDevice())) == VK_NULL_HANDLE) + { + vkDestroyImageView(dev.GetVulkanDevice(), image.view, nullptr); + return false; + } + + m_images.push_back(image); + } + + m_semaphores.reserve(image_count); + m_current_semaphore = (image_count - 1); + for (u32 i = 0; i < image_count; i++) + { + ImageSemaphores sema; + + const VkSemaphoreCreateInfo semaphore_info = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0}; + res = vkCreateSemaphore(dev.GetVulkanDevice(), &semaphore_info, nullptr, &sema.available_semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + + res = vkCreateSemaphore(dev.GetVulkanDevice(), &semaphore_info, nullptr, &sema.rendering_finished_semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + vkDestroySemaphore(dev.GetVulkanDevice(), sema.available_semaphore, nullptr); + return false; + } + + m_semaphores.push_back(sema); + } + + return true; +} + +void VulkanSwapChain::DestroySwapChainImages() +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + for (const auto& it : m_images) + { + // don't defer view destruction, images are no longer valid + vkDestroyFramebuffer(dev.GetVulkanDevice(), it.framebuffer, nullptr); + vkDestroyImageView(dev.GetVulkanDevice(), it.view, nullptr); + } + m_images.clear(); + for (auto& it : m_semaphores) + { + vkDestroySemaphore(dev.GetVulkanDevice(), it.rendering_finished_semaphore, nullptr); + vkDestroySemaphore(dev.GetVulkanDevice(), it.available_semaphore, nullptr); + } + m_semaphores.clear(); + + m_image_acquire_result.reset(); +} + +void VulkanSwapChain::DestroySwapChain() +{ + DestroySwapChainImages(); + + if (m_swap_chain == VK_NULL_HANDLE) + return; + + vkDestroySwapchainKHR(VulkanDevice::GetInstance().GetVulkanDevice(), m_swap_chain, nullptr); + m_swap_chain = VK_NULL_HANDLE; + m_window_info.surface_width = 0; + m_window_info.surface_height = 0; +} + +VkResult VulkanSwapChain::AcquireNextImage() +{ + if (m_image_acquire_result.has_value()) + return m_image_acquire_result.value(); + + if (!m_swap_chain) + return VK_ERROR_SURFACE_LOST_KHR; + + const VkResult res = + vkAcquireNextImageKHR(VulkanDevice::GetInstance().GetVulkanDevice(), m_swap_chain, UINT64_MAX, + m_semaphores[m_current_semaphore].available_semaphore, VK_NULL_HANDLE, &m_current_image); + m_image_acquire_result = res; + return res; +} + +void VulkanSwapChain::ReleaseCurrentImage() +{ + m_image_acquire_result.reset(); +} + +bool VulkanSwapChain::ResizeSwapChain(u32 new_width, u32 new_height, float new_scale) +{ + DestroySwapChainImages(); + + if (new_width != 0 && new_height != 0) + { + m_window_info.surface_width = new_width; + m_window_info.surface_height = new_height; + } + + m_window_info.surface_scale = new_scale; + + if (!CreateSwapChain()) + { + DestroySwapChain(); + return false; + } + + return true; +} + +bool VulkanSwapChain::SetVSync(bool mode) +{ + if (m_vsync_mode == mode) + return true; + + m_vsync_mode = mode; + + // Recreate the swap chain with the new present mode. + Log_VerbosePrintf("Recreating swap chain to change present mode."); + DestroySwapChainImages(); + if (!CreateSwapChain()) + { + DestroySwapChain(); + return false; + } + + return true; +} + +bool VulkanSwapChain::RecreateSurface(const WindowInfo& new_wi) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + + // Destroy the old swap chain, images, and surface. + DestroySwapChain(); + DestroySurface(); + + // Re-create the surface with the new native handle + m_window_info = new_wi; + m_surface = CreateVulkanSurface(dev.GetVulkanInstance(), dev.GetVulkanPhysicalDevice(), &m_window_info); + if (m_surface == VK_NULL_HANDLE) + return false; + + // The validation layers get angry at us if we don't call this before creating the swapchain. + VkBool32 present_supported = VK_TRUE; + VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(dev.GetVulkanPhysicalDevice(), dev.GetPresentQueueFamilyIndex(), + m_surface, &present_supported); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); + return false; + } + if (!present_supported) + { + Panic("Recreated surface does not support presenting."); + return false; + } + + // Finally re-create the swap chain + if (!CreateSwapChain()) + { + DestroySwapChain(); + return false; + } + + return true; +} + +void VulkanSwapChain::DestroySurface() +{ + if (m_surface == VK_NULL_HANDLE) + return; + + DestroyVulkanSurface(VulkanDevice::GetInstance().GetVulkanInstance(), &m_window_info, m_surface); + m_surface = VK_NULL_HANDLE; +} diff --git a/src/util/vulkan_swap_chain.h b/src/util/vulkan_swap_chain.h new file mode 100644 index 000000000..e4090989f --- /dev/null +++ b/src/util/vulkan_swap_chain.h @@ -0,0 +1,117 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "vulkan_loader.h" +#include "vulkan_texture.h" +#include "window_info.h" + +#include "common/types.h" + +#include +#include +#include + +class VulkanSwapChain +{ +public: + ~VulkanSwapChain(); + + // Creates a vulkan-renderable surface for the specified window handle. + static VkSurfaceKHR CreateVulkanSurface(VkInstance instance, VkPhysicalDevice physical_device, WindowInfo* wi); + + // Destroys a previously-created surface. + static void DestroyVulkanSurface(VkInstance instance, WindowInfo* wi, VkSurfaceKHR surface); + + // Create a new swap chain from a pre-existing surface. + static std::unique_ptr Create(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync, + std::optional exclusive_fullscreen_control); + + ALWAYS_INLINE VkSurfaceKHR GetSurface() const { return m_surface; } + ALWAYS_INLINE VkSwapchainKHR GetSwapChain() const { return m_swap_chain; } + ALWAYS_INLINE const VkSwapchainKHR* GetSwapChainPtr() const { return &m_swap_chain; } + ALWAYS_INLINE const WindowInfo& GetWindowInfo() const { return m_window_info; } + ALWAYS_INLINE u32 GetWidth() const { return m_window_info.surface_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_window_info.surface_height; } + ALWAYS_INLINE float GetScale() const { return m_window_info.surface_scale; } + ALWAYS_INLINE u32 GetCurrentImageIndex() const { return m_current_image; } + ALWAYS_INLINE const u32* GetCurrentImageIndexPtr() const { return &m_current_image; } + ALWAYS_INLINE u32 GetImageCount() const { return static_cast(m_images.size()); } + ALWAYS_INLINE VkFormat GetImageFormat() const { return m_format; } + ALWAYS_INLINE VkImage GetCurrentImage() const { return m_images[m_current_image].image; } + ALWAYS_INLINE VkFramebuffer GetCurrentFramebuffer() const { return m_images[m_current_image].framebuffer; } + ALWAYS_INLINE VkSemaphore GetImageAvailableSemaphore() const + { + return m_semaphores[m_current_semaphore].available_semaphore; + } + ALWAYS_INLINE const VkSemaphore* GetImageAvailableSemaphorePtr() const + { + return &m_semaphores[m_current_semaphore].available_semaphore; + } + ALWAYS_INLINE VkSemaphore GetRenderingFinishedSemaphore() const + { + return m_semaphores[m_current_semaphore].rendering_finished_semaphore; + } + ALWAYS_INLINE const VkSemaphore* GetRenderingFinishedSemaphorePtr() const + { + return &m_semaphores[m_current_semaphore].rendering_finished_semaphore; + } + + // Returns true if the current present mode is synchronizing (adaptive or hard). + ALWAYS_INLINE bool IsPresentModeSynchronizing() const { return (m_vsync_mode /*!= VsyncMode::Off*/); } + + VkRenderPass GetRenderPass(VkAttachmentLoadOp load_op) const; + VkResult AcquireNextImage(); + void ReleaseCurrentImage(); + + bool RecreateSurface(const WindowInfo& new_wi); + bool ResizeSwapChain(u32 new_width = 0, u32 new_height = 0, float new_scale = 1.0f); + + // Change vsync enabled state. This may fail as it causes a swapchain recreation. + bool SetVSync(bool mode); + +private: + VulkanSwapChain(const WindowInfo& wi, VkSurfaceKHR surface, bool vsync, + std::optional exclusive_fullscreen_control); + + static std::optional SelectSurfaceFormat(VkSurfaceKHR surface); + static std::optional SelectPresentMode(VkSurfaceKHR surface, bool vsync); + + bool CreateSwapChain(); + void DestroySwapChain(); + + bool SetupSwapChainImages(); + void DestroySwapChainImages(); + + void DestroySurface(); + + struct Image + { + VkImage image; + VkImageView view; + VkFramebuffer framebuffer; + }; + + struct ImageSemaphores + { + VkSemaphore available_semaphore; + VkSemaphore rendering_finished_semaphore; + }; + + WindowInfo m_window_info; + + VkSurfaceKHR m_surface = VK_NULL_HANDLE; + VkSwapchainKHR m_swap_chain = VK_NULL_HANDLE; + + std::vector m_images; + std::vector m_semaphores; + + VkFormat m_format = VK_FORMAT_UNDEFINED; + bool m_vsync_mode = false; + u32 m_current_image = 0; + u32 m_current_semaphore = 0; + + std::optional m_image_acquire_result; + std::optional m_exclusive_fullscreen_control; +}; diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp new file mode 100644 index 000000000..e557cafa3 --- /dev/null +++ b/src/util/vulkan_texture.cpp @@ -0,0 +1,1109 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "vulkan_texture.h" +#include "vulkan_builders.h" +#include "vulkan_device.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/bitutils.h" +#include "common/log.h" + +Log_SetChannel(VulkanDevice); + +static constexpr const VkComponentMapping s_identity_swizzle{ + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}; + +static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout) +{ + static constexpr std::array(VulkanTexture::Layout::Count)> s_vk_layout_mapping = {{ + VK_IMAGE_LAYOUT_UNDEFINED, // Undefined + VK_IMAGE_LAYOUT_PREINITIALIZED, // Preinitialized + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // ColorAttachment + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // DepthStencilAttachment + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // ShaderReadOnly + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // ClearDst + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TransferSrc + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TransferDst + VK_IMAGE_LAYOUT_GENERAL, // TransferSelf + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, // PresentSrc + VK_IMAGE_LAYOUT_GENERAL, // FeedbackLoop + VK_IMAGE_LAYOUT_GENERAL, // ReadWriteImage + VK_IMAGE_LAYOUT_GENERAL, // ComputeReadWriteImage + VK_IMAGE_LAYOUT_GENERAL, // General + }}; + return (layout == VulkanTexture::Layout::FeedbackLoop && VulkanDevice::GetInstance().UseFeedbackLoopLayout()) ? + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : + s_vk_layout_mapping[static_cast(layout)]; +} + +static VkAccessFlagBits GetFeedbackLoopInputAccessBits() +{ + return VulkanDevice::GetInstance().UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; +} + +VulkanTexture::VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, + VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format) + : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), + static_cast(samples), type, format), + m_image(image), m_allocation(allocation), m_view(view), m_vk_format(vk_format) +{ +} + +VulkanTexture::~VulkanTexture() +{ + Destroy(true); +} + +std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + Type type, Format format, VkFormat vk_format) +{ + if (!ValidateConfig(width, height, layers, levels, samples, type, format)) + return {}; + + VulkanDevice& dev = VulkanDevice::GetInstance(); + + VkImageCreateInfo ici = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + nullptr, + 0, + VK_IMAGE_TYPE_2D, + vk_format, + {static_cast(width), static_cast(height), 1}, + static_cast(levels), + 1, + static_cast(samples), + VK_IMAGE_TILING_OPTIMAL}; + + VmaAllocationCreateInfo aci = {}; + aci.usage = VMA_MEMORY_USAGE_GPU_ONLY; + aci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; + aci.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + VkImageViewCreateInfo vci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + VK_NULL_HANDLE, + VK_IMAGE_VIEW_TYPE_2D, + vk_format, + s_identity_swizzle, + {VK_IMAGE_ASPECT_COLOR_BIT, 0, static_cast(levels), 0, 1}}; + + // TODO: Don't need the feedback loop stuff yet. + switch (type) + { + case Type::Texture: + { + ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + } + break; + + case Type::RenderTarget: + { + DebugAssert(levels == 1); + ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + (dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); + } + break; + + case Type::DepthStencil: + { + DebugAssert(levels == 1); + ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + (dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : 0); + vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + } + break; + + case Type::RWTexture: + { + DebugAssert(levels == 1); + ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + } + break; + + default: + return {}; + } + + // Use dedicated allocations for typical RT size + if ((type == Type::RenderTarget || type == Type::DepthStencil) && width >= 512 && height >= 448) + aci.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + + VkImage image = VK_NULL_HANDLE; + VmaAllocation allocation = VK_NULL_HANDLE; + VkResult res = vmaCreateImage(dev.GetAllocator(), &ici, &aci, &image, &allocation, nullptr); + if (aci.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT && res != VK_SUCCESS) + { + // try without dedicated allocation + aci.flags &= ~VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + res = vmaCreateImage(dev.GetAllocator(), &ici, &aci, &image, &allocation, nullptr); + } + if (res == VK_ERROR_OUT_OF_DEVICE_MEMORY) + { + Log_ErrorPrintf("Failed to allocate device memory for %ux%u texture", width, height); + return {}; + } + else if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateImage failed: "); + return {}; + } + + VkImageView view = VK_NULL_HANDLE; + vci.image = image; + res = vkCreateImageView(dev.GetVulkanDevice(), &vci, nullptr, &view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + vmaDestroyImage(dev.GetAllocator(), image, allocation); + return {}; + } + + return std::unique_ptr( + new VulkanTexture(width, height, layers, levels, samples, type, format, image, allocation, view, vk_format)); +} + +void VulkanTexture::Destroy(bool defer) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + dev.UnbindTexture(this); + if (defer) + { + for (auto& it : m_descriptor_sets) + dev.DeferPersistentDescriptorSetDestruction(it.second); + } + else + { + for (auto& it : m_descriptor_sets) + dev.FreePersistentDescriptorSet(it.second); + } + m_descriptor_sets.clear(); + + if (m_view != VK_NULL_HANDLE) + { + if (defer) + VulkanDevice::GetInstance().DeferImageViewDestruction(m_view); + else + vkDestroyImageView(VulkanDevice::GetInstance().GetVulkanDevice(), m_view, nullptr); + m_view = VK_NULL_HANDLE; + } + + // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) + if (m_allocation != VK_NULL_HANDLE) + { + if (defer) + VulkanDevice::GetInstance().DeferImageDestruction(m_image, m_allocation); + else + vmaDestroyImage(VulkanDevice::GetInstance().GetAllocator(), m_image, m_allocation); + m_image = VK_NULL_HANDLE; + m_allocation = VK_NULL_HANDLE; + } +} + +VkImageLayout VulkanTexture::GetVkLayout() const +{ + return GetVkImageLayout(m_layout); +} + +VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate() +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + if (m_type != Type::Texture || m_use_fence_counter == dev.GetCurrentFenceCounter()) + { + // Console.WriteLn("Texture update within frame, can't use do beforehand"); + dev.EndRenderPass(); + return dev.GetCurrentCommandBuffer(); + } + + return dev.GetCurrentInitCommandBuffer(); +} + +void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, + u32 upload_pitch) const +{ + StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); +} + +VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, + u32 height) const +{ + const u32 size = upload_pitch * height; + const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0, + static_cast(size), + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr}; + + // Don't worry about setting the coherent bit for this upload, the main reason we had + // that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on + // smaller uploads, but we're writing to the whole thing anyway. + VmaAllocationCreateInfo aci = {}; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + + VmaAllocationInfo ai; + VkBuffer buffer; + VmaAllocation allocation; + VkResult res = vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &buffer, &allocation, &ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "(AllocateUploadStagingBuffer) vmaCreateBuffer() failed: "); + return VK_NULL_HANDLE; + } + + // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. + VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation); + + // And write the data. + CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch); + vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size); + return buffer; +} + +void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, + u32 pitch, VkBuffer buffer, u32 buffer_offset) +{ + const Layout old_layout = m_layout; + if (old_layout != Layout::TransferDst) + TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst); + + const u32 row_length = pitch / GetPixelSize(); + + const VkBufferImageCopy bic = {static_cast(buffer_offset), + row_length, + height, + {VK_IMAGE_ASPECT_COLOR_BIT, static_cast(level), 0u, 1u}, + {static_cast(x), static_cast(y), 0}, + {width, height, 1u}}; + + vkCmdCopyBufferToImage(cmdbuf, buffer, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bic); + + if (old_layout != Layout::TransferDst) + TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, Layout::TransferDst, old_layout); +} + +bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer, u32 level) +{ + DebugAssert(layer < m_layers && level < m_levels); + DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level)); + + const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); + const u32 required_size = height * upload_pitch; + VulkanDevice& dev = VulkanDevice::GetInstance(); + VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); + + // If the texture is larger than half our streaming buffer size, use a separate buffer. + // Otherwise allocation will either fail, or require lots of cmdbuffer submissions. + VkBuffer buffer; + u32 buffer_offset; + if (required_size > (sbuffer.GetCurrentSize() / 2)) + { + buffer_offset = 0; + buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + if (buffer == VK_NULL_HANDLE) + return false; + } + else + { + if (!sbuffer.ReserveMemory(required_size, dev.GetBufferCopyOffsetAlignment())) + { + dev.SubmitCommandBuffer(false, "While waiting for %u bytes in texture upload buffer", required_size); + if (!sbuffer.ReserveMemory(required_size, dev.GetBufferCopyOffsetAlignment())) + { + Log_ErrorPrintf("Failed to reserve texture upload memory (%u bytes).", required_size); + return false; + } + } + + buffer = sbuffer.GetBuffer(); + buffer_offset = sbuffer.GetCurrentOffset(); + CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + sbuffer.CommitMemory(required_size); + } + + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + + // if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear + if (m_type == Type::RenderTarget) + { + if (x != 0 || y != 0 || width != m_width || height != m_height) + CommitClear(cmdbuf); + else + m_state = State::Dirty; + } + + // first time the texture is used? don't leave it undefined + if (m_layout == Layout::Undefined) + TransitionToLayout(cmdbuf, Layout::TransferDst); + + UpdateFromBuffer(cmdbuf, x, y, width, height, layer, level, upload_pitch, buffer, buffer_offset); + TransitionToLayout(cmdbuf, Layout::ShaderReadOnly); + return true; +} + +bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level) +{ + // TODO: linear textures for dynamic? + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) + { + return false; + } + + VulkanDevice& dev = VulkanDevice::GetInstance(); + if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) + CommitClear(GetCommandBufferForUpdate()); + + // see note in Update() for the reason why. + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = height * aligned_pitch; + VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer(); + if (req_size >= (buffer.GetCurrentSize() / 2)) + return false; + + if (!buffer.ReserveMemory(req_size, dev.GetBufferCopyOffsetAlignment())) + { + dev.SubmitCommandBuffer(false, "While waiting for %u bytes in texture upload buffer", req_size); + if (!buffer.ReserveMemory(req_size, dev.GetBufferCopyOffsetAlignment())) + Panic("Failed to reserve texture upload memory"); + } + + // map for writing + *map = buffer.GetCurrentHostPointer(); + *map_stride = aligned_pitch; + m_map_x = static_cast(x); + m_map_y = static_cast(y); + m_map_width = static_cast(width); + m_map_height = static_cast(height); + m_map_layer = static_cast(layer); + m_map_level = static_cast(level); + m_state = GPUTexture::State::Dirty; + return true; +} + +void VulkanTexture::Unmap() +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer(); + const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = m_map_height * aligned_pitch; + const u32 offset = sb.GetCurrentOffset(); + sb.CommitMemory(req_size); + + // first time the texture is used? don't leave it undefined + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); + if (m_layout == Layout::Undefined) + TransitionToLayout(cmdbuf, Layout::TransferDst); + + UpdateFromBuffer(cmdbuf, m_map_x, m_map_y, m_map_width, m_map_height, m_map_layer, m_map_level, aligned_pitch, + sb.GetBuffer(), offset); + TransitionToLayout(cmdbuf, Layout::ShaderReadOnly); + + m_map_x = 0; + m_map_y = 0; + m_map_width = 0; + m_map_height = 0; + m_map_layer = 0; + m_map_level = 0; +} + +void VulkanTexture::CommitClear() +{ + if (m_state != GPUTexture::State::Cleared) + return; + + VulkanDevice& dev = VulkanDevice::GetInstance(); + dev.EndRenderPass(); + + CommitClear(dev.GetCurrentCommandBuffer()); +} + +void VulkanTexture::CommitClear(VkCommandBuffer cmdbuf) +{ + TransitionToLayout(cmdbuf, Layout::ClearDst); + + if (IsDepthStencil()) + { + const VkClearDepthStencilValue cv = {m_clear_value.depth}; + const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u}; + vkCmdClearDepthStencilImage(cmdbuf, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &cv, 1, &srr); + } + else + { + alignas(16) VkClearColorValue cv; + std::memcpy(cv.float32, GetUNormClearColor().data(), sizeof(cv.float32)); + const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; + vkCmdClearColorImage(cmdbuf, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &cv, 1, &srr); + } + + SetState(GPUTexture::State::Dirty); +} + +void VulkanTexture::OverrideImageLayout(Layout new_layout) +{ + m_layout = new_layout; +} + +void VulkanTexture::SetDebugName(const std::string_view& name) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + Vulkan::SetObjectName(dev.GetVulkanDevice(), m_image, name); + Vulkan::SetObjectName(dev.GetVulkanDevice(), m_view, name); +} + +void VulkanTexture::TransitionToLayout(Layout layout) +{ + TransitionToLayout(VulkanDevice::GetInstance().GetCurrentCommandBuffer(), layout); +} + +void VulkanTexture::TransitionToLayout(VkCommandBuffer command_buffer, Layout new_layout) +{ + if (m_layout == new_layout) + return; + + TransitionSubresourcesToLayout(command_buffer, 0, m_layers, 0, m_levels, m_layout, new_layout); + + m_layout = new_layout; +} + +void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_layer, u32 num_layers, + u32 start_level, u32 num_levels, Layout old_layout, + Layout new_layout) +{ + TransitionSubresourcesToLayout(command_buffer, m_image, m_type, start_layer, num_layers, start_level, num_levels, + old_layout, new_layout); +} + +void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, + u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels, + Layout old_layout, Layout new_layout) +{ + VkImageAspectFlags aspect; + if (type == Type::DepthStencil) + { + // TODO: detect stencil + aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + } + else + { + aspect = VK_IMAGE_ASPECT_COLOR_BIT; + } + + VkImageMemoryBarrier barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + 0, + 0, + GetVkImageLayout(old_layout), + GetVkImageLayout(new_layout), + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + image, + {aspect, start_level, num_levels, start_layer, num_layers}}; + + // srcStageMask -> Stages that must complete before the barrier + // dstStageMask -> Stages that must wait for after the barrier before beginning + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (old_layout) + { + case Layout::Undefined: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case Layout::Preinitialized: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case Layout::ColorAttachment: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case Layout::DepthStencilAttachment: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case Layout::ShaderReadOnly: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case Layout::ClearDst: + // Image was being used as a clear destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferSrc: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferDst: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferSelf: + // Image was being used as a copy source and destination, ensure all reads and writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::FeedbackLoop: + barrier.srcAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? + (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + GetFeedbackLoopInputAccessBits()) : + (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); + srcStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? + (VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) : + (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + break; + + case Layout::ReadWriteImage: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case Layout::ComputeReadWriteImage: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + + case Layout::General: + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case Layout::Undefined: + barrier.dstAccessMask = 0; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case Layout::ColorAttachment: + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case Layout::DepthStencilAttachment: + barrier.dstAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case Layout::ShaderReadOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case Layout::ClearDst: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferSrc: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferDst: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::TransferSelf: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case Layout::PresentSrc: + srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case Layout::FeedbackLoop: + barrier.dstAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? + (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + GetFeedbackLoopInputAccessBits()) : + (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); + dstStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? + (VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) : + (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + break; + + case Layout::ReadWriteImage: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case Layout::ComputeReadWriteImage: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + + case Layout::General: + default: + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + } + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, &barrier); +} + +VkDescriptorSet VulkanTexture::GetDescriptorSetWithSampler(VkSampler sampler) +{ + for (const auto& it : m_descriptor_sets) + { + if (it.first == sampler) + return it.second; + } + + VulkanDevice& dev = VulkanDevice::GetInstance(); + VkDescriptorSet ds = dev.AllocatePersistentDescriptorSet(dev.m_single_texture_ds_layout); + if (ds == VK_NULL_HANDLE) + Panic("Failed to allocate persistent descriptor set."); + + Vulkan::DescriptorSetUpdateBuilder dsub; + dsub.AddCombinedImageSamplerDescriptorWrite(ds, 0, m_view, sampler); + dsub.Update(dev.GetVulkanDevice(), false); + m_descriptor_sets.emplace_back(sampler, ds); + return ds; +} + +void VulkanTexture::MakeReadyForSampling() +{ + if (m_layout == Layout::ShaderReadOnly) + return; + + VulkanDevice& dev = VulkanDevice::GetInstance(); + if (dev.InRenderPass()) + dev.EndRenderPass(); + + TransitionToLayout(Layout::ShaderReadOnly); +} + +std::unique_ptr VulkanDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data /* = nullptr */, u32 data_stride /* = 0 */, + bool dynamic /* = false */) +{ + const VkFormat vk_format = VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(format)]; + std::unique_ptr tex = + VulkanTexture::Create(width, height, layers, levels, samples, type, format, vk_format); + if (tex && data) + tex->Update(0, 0, width, height, data, data_stride); + + return tex; +} + +bool VulkanDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ + VulkanTexture* T = static_cast(texture); + T->CommitClear(); + + const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), GetBufferCopyRowPitchAlignment()); + const u32 size = pitch * height; + const u32 level = 0; + if (!CheckDownloadBufferSize(size)) + { + Log_ErrorPrintf("Can't read back %ux%u", width, height); + return false; + } + + if (InRenderPass()) + EndRenderPass(); + + const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + + VulkanTexture::Layout old_layout = T->GetLayout(); + if (old_layout != VulkanTexture::Layout::TransferSrc) + T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, old_layout, VulkanTexture::Layout::TransferSrc); + + VkBufferImageCopy image_copy = {}; + const VkImageAspectFlags aspect = T->IsDepthStencil() ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + image_copy.bufferOffset = 0; + image_copy.bufferRowLength = pitch / T->GetPixelSize(); + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {aspect, level, 0u, 1u}; + image_copy.imageOffset = {static_cast(x), static_cast(y), 0}; + image_copy.imageExtent = {width, height, 1u}; + + // do the copy + vkCmdCopyImageToBuffer(cmdbuf, T->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_download_buffer, 1, + &image_copy); + + // flush gpu cache + const VkBufferMemoryBarrier buffer_info = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask + VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_download_buffer, // VkBuffer buffer + 0, // VkDeviceSize offset + size // VkDeviceSize size + }; + vkCmdPipelineBarrier(cmdbuf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &buffer_info, + 0, nullptr); + + if (old_layout != VulkanTexture::Layout::TransferSrc) + T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, old_layout); + + SubmitCommandBuffer(true); + + // invalidate cpu cache before reading + VkResult res = vmaInvalidateAllocation(m_allocator, m_download_buffer_allocation, 0, size); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: "); + + StringUtil::StrideMemCpy(out_data, out_data_stride, m_download_buffer_map, pitch, width * T->GetPixelSize(), height); + return true; +} + +bool VulkanDevice::CheckDownloadBufferSize(u32 required_size) +{ + if (m_download_buffer_size >= required_size) + return true; + + DestroyDownloadBuffer(); + + // Adreno has slow coherent cached reads. + const bool is_adreno = (m_device_properties.vendorID == 0x5143 || + m_device_driver_properties.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY); + + const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0u, + required_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_SHARING_MODE_EXCLUSIVE, + 0u, + nullptr}; + + VmaAllocationCreateInfo aci = {}; + aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + aci.preferredFlags = is_adreno ? (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + VmaAllocationInfo ai = {}; + VkResult res = vmaCreateBuffer(m_allocator, &bci, &aci, &m_download_buffer, &m_download_buffer_allocation, &ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); + return false; + } + + m_download_buffer_map = static_cast(ai.pMappedData); + return true; +} + +void VulkanDevice::DestroyDownloadBuffer() +{ + if (m_download_buffer == VK_NULL_HANDLE) + return; + + vmaDestroyBuffer(m_allocator, m_download_buffer, m_download_buffer_allocation); + + // unmapped as part of the buffer destroy + m_download_buffer = VK_NULL_HANDLE; + m_download_buffer_allocation = VK_NULL_HANDLE; + m_download_buffer_map = nullptr; + m_download_buffer_size = 0; +} + +VulkanSampler::VulkanSampler(VkSampler sampler) : m_sampler(sampler) +{ +} + +VulkanSampler::~VulkanSampler() +{ + // Cleaned up by main class. +} + +void VulkanSampler::SetDebugName(const std::string_view& name) +{ + Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_sampler, name); +} + +VkSampler VulkanDevice::GetSampler(const GPUSampler::Config& config) +{ + const auto it = m_sampler_map.find(config.key); + if (it != m_sampler_map.end()) + return it->second; + + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + VK_SAMPLER_ADDRESS_MODE_REPEAT, // Repeat + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // ClampToEdge + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // ClampToBorder + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> min_mag_filters = {{ + VK_FILTER_NEAREST, // Nearest + VK_FILTER_LINEAR, // Linear + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> mip_filters = {{ + VK_SAMPLER_MIPMAP_MODE_NEAREST, // Nearest + VK_SAMPLER_MIPMAP_MODE_LINEAR, // Linear + }}; + struct BorderColorMapping + { + u32 color; + VkBorderColor vk_color; + }; + static constexpr BorderColorMapping border_color_mapping[] = { + {0x00000000u, VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK}, + {0xFF000000u, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK}, + {0xFFFFFFFFu, VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE}, + }; + + // See https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkSamplerCreateInfo.html#_description + // for the reasoning behind 0.25f here. + VkSamplerCreateInfo ci = { + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + nullptr, + 0, + min_mag_filters[static_cast(config.min_filter.GetValue())], // min + min_mag_filters[static_cast(config.mag_filter.GetValue())], // mag + mip_filters[static_cast(config.mip_filter.GetValue())], // mip + ta[static_cast(config.address_u.GetValue())], // u + ta[static_cast(config.address_v.GetValue())], // v + ta[static_cast(config.address_w.GetValue())], // w + 0.0f, // lod bias + static_cast(config.anisotropy > 1), // anisotropy enable + static_cast(config.anisotropy), // anisotropy + VK_FALSE, // compare enable + VK_COMPARE_OP_ALWAYS, // compare op + static_cast(config.min_lod), // min lod + (config.max_lod == 0) ? 0.25f : static_cast(config.max_lod), // max lod + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // border + VK_FALSE // unnormalized coordinates + }; + + if (config.address_u == GPUSampler::AddressMode::ClampToBorder || + config.address_v == GPUSampler::AddressMode::ClampToBorder || + config.address_w == GPUSampler::AddressMode::ClampToBorder) + { + u32 i; + for (i = 0; i < static_cast(std::size(border_color_mapping)); i++) + { + if (border_color_mapping[i].color == config.border_color) + break; + } + if (i == std::size(border_color_mapping)) + { + Log_ErrorPrintf("Unsupported border color: %08X", config.border_color.GetValue()); + return {}; + } + + ci.borderColor = border_color_mapping[i].vk_color; + } + + VkSampler sampler = VK_NULL_HANDLE; + VkResult res = vkCreateSampler(m_device, &ci, nullptr, &sampler); + if (res != VK_SUCCESS) + LOG_VULKAN_ERROR(res, "vkCreateSampler() failed: "); + + m_sampler_map.emplace(config.key, sampler); + return sampler; +} + +void VulkanDevice::DestroySamplers() +{ + for (auto& it : m_sampler_map) + { + if (it.second != VK_NULL_HANDLE) + vkDestroySampler(m_device, it.second, nullptr); + } + m_sampler_map.clear(); +} + +std::unique_ptr VulkanDevice::CreateSampler(const GPUSampler::Config& config) +{ + const VkSampler vsampler = GetSampler(config); + if (vsampler == VK_NULL_HANDLE) + return {}; + + return std::unique_ptr(new VulkanSampler(vsampler)); +} + +VulkanFramebuffer::VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb) + : GPUFramebuffer(rt, ds, width, height), m_framebuffer(fb) +{ +} + +VulkanFramebuffer::~VulkanFramebuffer() +{ + VulkanDevice::GetInstance().DeferFramebufferDestruction(m_framebuffer); +} + +void VulkanFramebuffer::SetDebugName(const std::string_view& name) +{ + Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_framebuffer, name); +} + +std::unique_ptr VulkanDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/) +{ + DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); + VulkanTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); + VulkanTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); + + const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); + const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); + + const VkRenderPass render_pass = + GetRenderPass(RT ? RT->GetVkFormat() : VK_FORMAT_UNDEFINED, DS ? DS->GetVkFormat() : VK_FORMAT_UNDEFINED, + VK_SAMPLE_COUNT_1_BIT, RT ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + RT ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE, + DS ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + DS ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE); + DebugAssert(render_pass != VK_NULL_HANDLE); + + Vulkan::FramebufferBuilder fbb; + fbb.SetRenderPass(render_pass); + fbb.SetSize(width, height, 1); + if (RT) + fbb.AddAttachment(RT->GetView()); + if (DS) + fbb.AddAttachment(DS->GetView()); + + const VkFramebuffer fb = fbb.Create(m_device, false); + if (fb == VK_NULL_HANDLE) + return {}; + + return std::unique_ptr(new VulkanFramebuffer(RT, DS, width, height, fb)); +} + +VulkanTextureBuffer::VulkanTextureBuffer(Format format, u32 size_in_elements) + : GPUTextureBuffer(format, size_in_elements) +{ +} + +VulkanTextureBuffer::~VulkanTextureBuffer() +{ + Destroy(true); +} + +bool VulkanTextureBuffer::CreateBuffer(bool ssbo) +{ + return m_buffer.Create(ssbo ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, + GetSizeInBytes()); +} + +void VulkanTextureBuffer::Destroy(bool defer) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + if (m_buffer_view != VK_NULL_HANDLE) + { + if (defer) + dev.DeferBufferViewDestruction(m_buffer_view); + else + vkDestroyBufferView(dev.GetVulkanDevice(), m_buffer_view, nullptr); + } + if (m_descriptor_set != VK_NULL_HANDLE) + { + if (defer) + dev.DeferPersistentDescriptorSetDestruction(m_descriptor_set); + else + dev.FreePersistentDescriptorSet(m_descriptor_set); + } +} + +void* VulkanTextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const u32 req_size = esize * required_elements; + if (!m_buffer.ReserveMemory(req_size, esize)) + { + VulkanDevice::GetInstance().SubmitCommandBufferAndRestartRenderPass("out of space in texture buffer"); + if (!m_buffer.ReserveMemory(req_size, esize)) + Panic("Failed to allocate texture buffer space."); + } + + m_current_position = m_buffer.GetCurrentOffset() / esize; + return m_buffer.GetCurrentHostPointer(); +} + +void VulkanTextureBuffer::Unmap(u32 used_elements) +{ + m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); +} + +void VulkanTextureBuffer::SetDebugName(const std::string_view& name) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + Vulkan::SetObjectName(dev.GetVulkanDevice(), m_buffer.GetBuffer(), name); + if (m_buffer_view != VK_NULL_HANDLE) + Vulkan::SetObjectName(dev.GetVulkanDevice(), m_buffer_view, name); +} + +std::unique_ptr VulkanDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ + static constexpr std::array(GPUTextureBuffer::Format::MaxCount)> format_mapping = {{ + VK_FORMAT_R16_UINT, // R16UI + }}; + + const bool ssbo = m_features.texture_buffers_emulated_with_ssbo; + std::unique_ptr tb = std::make_unique(format, size_in_elements); + if (!tb->CreateBuffer(ssbo)) + return {}; + + tb->m_descriptor_set = AllocatePersistentDescriptorSet(m_single_texture_buffer_ds_layout); + if (tb->m_descriptor_set == VK_NULL_HANDLE) + { + Log_ErrorPrintf("Failed to allocate persistent descriptor set for texture buffer."); + tb->Destroy(false); + return {}; + } + + Vulkan::DescriptorSetUpdateBuilder dsub; + if (ssbo) + { + dsub.AddBufferDescriptorWrite(tb->m_descriptor_set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, tb->GetBuffer(), 0, + tb->GetSizeInBytes()); + } + else + { + Vulkan::BufferViewBuilder bvb; + bvb.Set(tb->GetBuffer(), format_mapping[static_cast(format)], 0, tb->GetSizeInBytes()); + if ((tb->m_buffer_view = bvb.Create(m_device, false)) == VK_NULL_HANDLE) + { + Log_ErrorPrintf("Failed to create buffer view for texture buffer."); + tb->Destroy(false); + return {}; + } + + dsub.AddBufferViewDescriptorWrite(tb->m_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + tb->m_buffer_view); + } + dsub.Update(m_device, false); + + return tb; +} diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h new file mode 100644 index 000000000..8cfc58f9f --- /dev/null +++ b/src/util/vulkan_texture.h @@ -0,0 +1,172 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" +#include "gpu_texture.h" +#include "vulkan_loader.h" +#include "vulkan_stream_buffer.h" + +#include +#include + +class VulkanDevice; + +class VulkanTexture final : public GPUTexture +{ +public: + enum class Layout : u32 + { + Undefined, + Preinitialized, + ColorAttachment, + DepthStencilAttachment, + ShaderReadOnly, + ClearDst, + TransferSrc, + TransferDst, + TransferSelf, + PresentSrc, + FeedbackLoop, + ReadWriteImage, + ComputeReadWriteImage, + General, + Count + }; + + ~VulkanTexture() override; + + static std::unique_ptr Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, VkFormat vk_format); + void Destroy(bool defer); + + ALWAYS_INLINE VkImage GetImage() const { return m_image; } + ALWAYS_INLINE VkImageView GetView() const { return m_view; } + ALWAYS_INLINE Layout GetLayout() const { return m_layout; } + ALWAYS_INLINE VkFormat GetVkFormat() const { return m_vk_format; } + + VkImageLayout GetVkLayout() const; + + bool IsValid() const override { return (m_image != VK_NULL_HANDLE); } + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + void MakeReadyForSampling() override; + + void SetDebugName(const std::string_view& name) override; + + void TransitionToLayout(Layout layout); + void CommitClear(); + void CommitClear(VkCommandBuffer cmdbuf); + + // Used when the render pass is changing the image layout, or to force it to + // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is + // irrelevant and will not be loaded. + void OverrideImageLayout(Layout new_layout); + + void TransitionToLayout(VkCommandBuffer command_buffer, Layout new_layout); + void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_layer, u32 num_layers, u32 start_level, + u32 num_levels, Layout old_layout, Layout new_layout); + + static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, u32 start_layer, + u32 num_layers, u32 start_level, u32 num_levels, Layout old_layout, + Layout new_layout); + + // Call when the texture is bound to the pipeline, or read from in a copy. + ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; } + + VkDescriptorSet GetDescriptorSetWithSampler(VkSampler sampler); + +private: + VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, VkImage image, + VmaAllocation allocation, VkImageView view, VkFormat vk_format); + + VkCommandBuffer GetCommandBufferForUpdate(); + void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; + VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const; + void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch, + VkBuffer buffer, u32 buffer_offset); + + VkImage m_image = VK_NULL_HANDLE; + VmaAllocation m_allocation = VK_NULL_HANDLE; + VkImageView m_view = VK_NULL_HANDLE; + VkFormat m_vk_format = VK_FORMAT_UNDEFINED; + Layout m_layout = Layout::Undefined; + + // Contains the fence counter when the texture was last used. + // When this matches the current fence counter, the texture was used this command buffer. + u64 m_use_fence_counter = 0; + + // Single-bind-point descriptor/sampler pairs. + std::vector> m_descriptor_sets; + + u16 m_map_x = 0; + u16 m_map_y = 0; + u16 m_map_width = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; +}; + +class VulkanSampler final : public GPUSampler +{ + friend VulkanDevice; + +public: + ~VulkanSampler() override; + + ALWAYS_INLINE VkSampler GetSampler() const { return m_sampler; } + + void SetDebugName(const std::string_view& name) override; + +private: + VulkanSampler(VkSampler sampler); + + VkSampler m_sampler; +}; + +class VulkanFramebuffer final : public GPUFramebuffer +{ + friend VulkanDevice; + +public: + ~VulkanFramebuffer() override; + + ALWAYS_INLINE VkFramebuffer GetFramebuffer() const { return m_framebuffer; } + + void SetDebugName(const std::string_view& name) override; + + // TODO: Maybe render passes should be in here to avoid the map lookup... + +private: + VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb); + + VkFramebuffer m_framebuffer; +}; + +class VulkanTextureBuffer final : public GPUTextureBuffer +{ + friend VulkanDevice; + +public: + VulkanTextureBuffer(Format format, u32 size_in_elements); + ~VulkanTextureBuffer() override; + + ALWAYS_INLINE VkBuffer GetBuffer() const { return m_buffer.GetBuffer(); } + ALWAYS_INLINE VkDescriptorSet GetDescriptorSet() const { return m_descriptor_set; } + + bool CreateBuffer(bool ssbo); + void Destroy(bool defer); + + // Inherited via GPUTextureBuffer + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; + + void SetDebugName(const std::string_view& name) override; + +private: + VulkanStreamBuffer m_buffer; + VkBufferView m_buffer_view = VK_NULL_HANDLE; + VkDescriptorSet m_descriptor_set = VK_NULL_HANDLE; +}; diff --git a/src/common/window_info.cpp b/src/util/window_info.cpp similarity index 76% rename from src/common/window_info.cpp rename to src/util/window_info.cpp index f2e4f0b1c..114409775 100644 --- a/src/common/window_info.cpp +++ b/src/util/window_info.cpp @@ -1,8 +1,11 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "window_info.h" + +#include "common/assert.h" #include "common/log.h" + Log_SetChannel(WindowInfo); void WindowInfo::SetSurfaceless() @@ -13,7 +16,7 @@ void WindowInfo::SetSurfaceless() surface_height = 0; surface_refresh_rate = 0.0f; surface_scale = 1.0f; - surface_format = SurfaceFormat::None; + surface_format = GPUTexture::Format::Unknown; #ifdef __APPLE__ surface_handle = nullptr; @@ -27,34 +30,13 @@ void WindowInfo::SetSurfaceless() static bool GetRefreshRateFromDWM(HWND hwnd, float* refresh_rate) { - static HMODULE dwm_module = nullptr; - static HRESULT(STDAPICALLTYPE * is_composition_enabled)(BOOL * pfEnabled) = nullptr; - static HRESULT(STDAPICALLTYPE * get_timing_info)(HWND hwnd, DWM_TIMING_INFO * pTimingInfo) = nullptr; - static bool load_tried = false; - if (!load_tried) - { - load_tried = true; - dwm_module = LoadLibrary("dwmapi.dll"); - if (dwm_module) - { - std::atexit([]() { - FreeLibrary(dwm_module); - dwm_module = nullptr; - }); - is_composition_enabled = - reinterpret_cast(GetProcAddress(dwm_module, "DwmIsCompositionEnabled")); - get_timing_info = - reinterpret_cast(GetProcAddress(dwm_module, "DwmGetCompositionTimingInfo")); - } - } - BOOL composition_enabled; - if (!is_composition_enabled || FAILED(is_composition_enabled(&composition_enabled) || !get_timing_info)) + if (FAILED(DwmIsCompositionEnabled(&composition_enabled))) return false; DWM_TIMING_INFO ti = {}; ti.cbSize = sizeof(ti); - HRESULT hr = get_timing_info(nullptr, &ti); + HRESULT hr = DwmGetCompositionTimingInfo(nullptr, &ti); if (SUCCEEDED(hr)) { if (ti.rateRefresh.uiNumerator == 0 || ti.rateRefresh.uiDenominator == 0) @@ -106,9 +88,53 @@ bool WindowInfo::QueryRefreshRateForWindow(const WindowInfo& wi, float* refresh_ #ifdef USE_X11 #include "common/scoped_guard.h" -#include "gl/x11_window.h" + +#include +#include #include +// Helper class for managing X errors +namespace { +class X11InhibitErrors; + +static X11InhibitErrors* s_current_error_inhibiter; + +class X11InhibitErrors +{ +public: + X11InhibitErrors() + { + Assert(!s_current_error_inhibiter); + m_old_handler = XSetErrorHandler(ErrorHandler); + s_current_error_inhibiter = this; + } + + ~X11InhibitErrors() + { + Assert(s_current_error_inhibiter == this); + s_current_error_inhibiter = nullptr; + XSetErrorHandler(m_old_handler); + } + + ALWAYS_INLINE bool HadError() const { return m_had_error; } + +private: + static int ErrorHandler(Display* display, XErrorEvent* ee) + { + char error_string[256] = {}; + XGetErrorText(display, ee->error_code, error_string, sizeof(error_string)); + Log_WarningPrintf("X11 Error: %s (Error %u Minor %u Request %u)", error_string, ee->error_code, ee->minor_code, + ee->request_code); + + s_current_error_inhibiter->m_had_error = true; + return 0; + } + + XErrorHandler m_old_handler = {}; + bool m_had_error = false; +}; +} // namespace + static bool GetRefreshRateFromXRandR(const WindowInfo& wi, float* refresh_rate) { Display* display = static_cast(wi.display_connection); @@ -116,7 +142,7 @@ static bool GetRefreshRateFromXRandR(const WindowInfo& wi, float* refresh_rate) if (!display || !window) return false; - GL::X11InhibitErrors inhibiter; + X11InhibitErrors inhibiter; XRRScreenResources* res = XRRGetScreenResources(display, window); if (!res) diff --git a/src/common/window_info.h b/src/util/window_info.h similarity index 81% rename from src/common/window_info.h rename to src/util/window_info.h index 9fff80f4e..2c3f0edbf 100644 --- a/src/common/window_info.h +++ b/src/util/window_info.h @@ -2,7 +2,8 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "types.h" +#include "gpu_texture.h" +#include "common/types.h" // Contains the information required to create a graphics context in a window. struct WindowInfo @@ -18,16 +19,6 @@ struct WindowInfo Display, }; - enum class SurfaceFormat - { - None, - Auto, - RGB8, - RGBA8, - RGB565, - Count - }; - Type type = Type::Surfaceless; void* display_connection = nullptr; void* window_handle = nullptr; @@ -35,13 +26,15 @@ struct WindowInfo u32 surface_height = 0; float surface_refresh_rate = 0.0f; float surface_scale = 1.0f; - SurfaceFormat surface_format = SurfaceFormat::RGB8; + GPUTexture::Format surface_format = GPUTexture::Format::Unknown; // Needed for macOS. #ifdef __APPLE__ void* surface_handle = nullptr; #endif + ALWAYS_INLINE bool IsSurfaceless() const { return type == Type::Surfaceless; } + // Changes the window to be surfaceless (i.e. no handle/size/etc). void SetSurfaceless();