diff --git a/.ci/clang-format.sh b/.ci/clang-format.sh index 0ccd4062..c0d8c2c2 100755 --- a/.ci/clang-format.sh +++ b/.ci/clang-format.sh @@ -10,7 +10,7 @@ if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dis fi # Default clang-format points to default 3.5 version one -CLANG_FORMAT=clang-format-17 +CLANG_FORMAT=clang-format-18 $CLANG_FORMAT --version if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a3b3ee8e..53b4c738 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,7 +18,7 @@ jobs: continue-on-error: true steps: - uses: actions/checkout@v4 - - uses: fsfe/reuse-action@v4 + - uses: fsfe/reuse-action@v5 clang-format: runs-on: ubuntu-latest @@ -30,9 +30,9 @@ jobs: - name: Install run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main' + sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main' sudo apt update - sudo apt install clang-format-17 + sudo apt install clang-format-18 - name: Build env: COMMIT_RANGE: ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} diff --git a/.gitignore b/.gitignore index 61d9e32e..c331f9e7 100644 --- a/.gitignore +++ b/.gitignore @@ -414,3 +414,6 @@ FodyWeavers.xsd # for macOS **/.DS_Store + +# JetBrains +.idea diff --git a/CMakeLists.txt b/CMakeLists.txt index f4b052af..e135794e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -670,8 +670,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/word_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h - src/video_core/renderer_vulkan/renderer_vulkan.cpp - src/video_core/renderer_vulkan/renderer_vulkan.h src/video_core/renderer_vulkan/vk_common.cpp src/video_core/renderer_vulkan/vk_common.h src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -690,6 +688,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_pipeline_common.h src/video_core/renderer_vulkan/vk_platform.cpp src/video_core/renderer_vulkan/vk_platform.h + src/video_core/renderer_vulkan/vk_presenter.cpp + src/video_core/renderer_vulkan/vk_presenter.h src/video_core/renderer_vulkan/vk_rasterizer.cpp src/video_core/renderer_vulkan/vk_rasterizer.h src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -934,7 +934,10 @@ if (ENABLE_QT_GUI) set_target_properties(shadps4 PROPERTIES # WIN32_EXECUTABLE ON MACOSX_BUNDLE ON - MACOSX_BUNDLE_ICON_FILE shadPS4.icns) + MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_SOURCE_DIR}/dist/MacOSBundleInfo.plist.in" + MACOSX_BUNDLE_ICON_FILE "shadPS4.icns" + MACOSX_BUNDLE_SHORT_VERSION_STRING "0.4.1" + ) set_source_files_properties(src/images/shadPS4.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources) diff --git a/REUSE.toml b/REUSE.toml index 40515623..7b2862e5 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -7,6 +7,7 @@ path = [ ".github/FUNDING.yml", ".github/shadps4.png", ".gitmodules", + "dist/MacOSBundleInfo.plist.in", "dist/net.shadps4.shadPS4.desktop", "dist/net.shadps4.shadPS4_metadata.pot", "dist/net.shadps4.shadPS4.metainfo.xml", diff --git a/dist/MacOSBundleInfo.plist.in b/dist/MacOSBundleInfo.plist.in new file mode 100644 index 00000000..70cbfb4a --- /dev/null +++ b/dist/MacOSBundleInfo.plist.in @@ -0,0 +1,46 @@ + + + + + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + APPL + + CFBundleName + shadps4 + CFBundleIdentifier + com.shadps4-emu.shadps4 + CFBundleExecutable + shadps4 + + CFBundleVersion + 1.0.0 + CFBundleShortVersionString + ${MACOSX_BUNDLE_SHORT_VERSION_STRING} + + LSMinimumSystemVersion + ${CMAKE_OSX_DEPLOYMENT_TARGET} + LSApplicationCategoryType + public.app-category.games + GCSupportsGameMode + + + NSHumanReadableCopyright + + + CFBundleIconFile + ${MACOSX_BUNDLE_ICON_FILE} + + CFBundleDevelopmentRegion + en + CFBundleAllowMixedLocalizations + + + NSPrincipalClass + NSApplication + + NSSupportsAutomaticGraphicsSwitching + + + diff --git a/src/common/discord_rpc_handler.cpp b/src/common/discord_rpc_handler.cpp index 91b278a1..448cb4a7 100644 --- a/src/common/discord_rpc_handler.cpp +++ b/src/common/discord_rpc_handler.cpp @@ -3,7 +3,7 @@ #include #include -#include "src/common/discord_rpc_handler.h" +#include "discord_rpc_handler.h" namespace DiscordRPCHandler { @@ -17,7 +17,7 @@ void RPC::init() { void RPC::setStatusIdling() { DiscordRichPresence rpc{}; - rpc.largeImageKey = "https://github.com/shadps4-emu/shadPS4/raw/main/.github/shadps4.png"; + rpc.largeImageKey = "https://cdn.jsdelivr.net/gh/shadps4-emu/shadPS4@main/.github/shadps4.png"; rpc.largeImageText = "shadPS4 is a PS4 emulator"; rpc.startTimestamp = startTimestamp; rpc.details = "Idle"; diff --git a/src/core/devtools/layer.cpp b/src/core/devtools/layer.cpp index 264b3be0..2c4ce20b 100644 --- a/src/core/devtools/layer.cpp +++ b/src/core/devtools/layer.cpp @@ -11,9 +11,12 @@ #include "imgui_internal.h" #include "layer.h" #include "options.h" +#include "video_core/renderer_vulkan/vk_presenter.h" #include "widget/frame_dump.h" #include "widget/frame_graph.h" +extern std::unique_ptr presenter; + using namespace ImGui; using namespace Core::Devtools; using L = Core::Devtools::Layer; @@ -71,6 +74,13 @@ void L::DrawMenuBar() { open_popup_help = MenuItem("Help & Tips"); ImGui::EndMenu(); } + if (BeginMenu("Display")) { + if (BeginMenu("Brightness")) { + SliderFloat("Gamma", &presenter->GetGammaRef(), 0.1f, 2.0f); + ImGui::EndMenu(); + } + ImGui::EndMenu(); + } EndMainMenuBar(); } diff --git a/src/core/devtools/widget/imgui_memory_editor.h b/src/core/devtools/widget/imgui_memory_editor.h index fb1f4676..f90387a7 100644 --- a/src/core/devtools/widget/imgui_memory_editor.h +++ b/src/core/devtools/widget/imgui_memory_editor.h @@ -929,7 +929,7 @@ struct MemoryEditor { default: case ImGuiDataType_COUNT: break; - } // Switch + } // Switch IM_ASSERT(0); // Shouldn't reach } }; diff --git a/src/core/file_format/psf.cpp b/src/core/file_format/psf.cpp index 0502f29d..7e0ffc9a 100644 --- a/src/core/file_format/psf.cpp +++ b/src/core/file_format/psf.cpp @@ -21,8 +21,13 @@ static inline u32 get_max_size(std::string_view key, u32 default_value) { } bool PSF::Open(const std::filesystem::path& filepath) { + using namespace std::chrono; if (std::filesystem::exists(filepath)) { - last_write = std::filesystem::last_write_time(filepath); + const auto t = std::filesystem::last_write_time(filepath); + const auto rel = + duration_cast(t - std::filesystem::file_time_type::clock::now()).count(); + const auto tp = system_clock::to_time_t(system_clock::now() + seconds{rel}); + last_write = system_clock::from_time_t(tp); } Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read); @@ -99,7 +104,7 @@ bool PSF::Encode(const std::filesystem::path& filepath) const { return false; } - last_write = std::filesystem::file_time_type::clock::now(); + last_write = std::chrono::system_clock::now(); const auto psf_buffer = Encode(); const size_t written = file.Write(psf_buffer); diff --git a/src/core/file_format/psf.h b/src/core/file_format/psf.h index 6f35fa69..0f662131 100644 --- a/src/core/file_format/psf.h +++ b/src/core/file_format/psf.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -71,7 +72,7 @@ public: void AddString(std::string key, std::string value, bool update = false); void AddInteger(std::string key, s32 value, bool update = false); - [[nodiscard]] std::filesystem::file_time_type GetLastWrite() const { + [[nodiscard]] std::chrono::system_clock::time_point GetLastWrite() const { return last_write; } @@ -80,7 +81,7 @@ public: } private: - mutable std::filesystem::file_time_type last_write; + mutable std::chrono::system_clock::time_point last_write; std::vector entry_list; diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 752ab6fb..9aede330 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -18,10 +18,10 @@ #include "core/platform.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/pm4_cmds.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_presenter.h" extern Frontend::WindowSDL* g_window; -std::unique_ptr renderer; +std::unique_ptr presenter; std::unique_ptr liverpool; namespace Libraries::GnmDriver { @@ -2757,9 +2757,9 @@ int PS4_SYSV_ABI Func_F916890425496553() { } void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) { - LOG_INFO(Lib_GnmDriver, "Initializing renderer"); + LOG_INFO(Lib_GnmDriver, "Initializing presenter"); liverpool = std::make_unique(); - renderer = std::make_unique(*g_window, liverpool.get()); + presenter = std::make_unique(*g_window, liverpool.get()); const int result = sceKernelGetCompiledSdkVersion(&sdk_version); if (result != ORBIS_OK) { diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index 01e56f8b..52abe910 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -13,6 +13,33 @@ #include "imgui/imgui_std.h" #include "savedatadialog_ui.h" +#ifdef __APPLE__ +#include + +// Need to make a copy of the formatter for std::chrono::local_time for use with date::local_time +template +struct fmt::formatter, Char> : formatter { + FMT_CONSTEXPR formatter() { + this->format_str_ = fmt::detail::string_literal(); + } + + template + auto format(date::local_time val, FormatContext& ctx) const -> decltype(ctx.out()) { + using period = typename Duration::period; + if (period::num == 1 && period::den == 1 && + !std::is_floating_point::value) { + return formatter::format( + localtime(fmt::detail::to_time_t(date::current_zone()->to_sys(val))), ctx); + } + auto epoch = val.time_since_epoch(); + auto subsecs = fmt::detail::duration_cast( + epoch - fmt::detail::duration_cast(epoch)); + return formatter::do_format( + localtime(fmt::detail::to_time_t(date::current_zone()->to_sys(val))), ctx, &subsecs); + } +}; +#endif + using namespace ImGui; using namespace Libraries::CommonDialog; using Common::ElfInfo; @@ -98,12 +125,12 @@ SaveDialogState::SaveDialogState(const OrbisSaveDataDialogParam& param) { param_sfo.Open(param_sfo_path); auto last_write = param_sfo.GetLastWrite(); -#if defined(_WIN32) && !defined(__GNUC__) && !defined(__MINGW32__) && !defined(__MINGW64__) - auto utc_time = std::chrono::file_clock::to_utc(last_write); +#ifdef __APPLE__ + auto t = date::zoned_time{date::current_zone(), last_write}; #else - auto utc_time = std::chrono::file_clock::to_sys(last_write); + auto t = std::chrono::zoned_time{std::chrono::current_zone(), last_write}; #endif - std::string date_str = fmt::format("{:%d %b, %Y %R}", utc_time); + std::string date_str = fmt::format("{:%d %b, %Y %R}", t.get_local_time()); size_t size = Common::FS::GetDirectorySize(dir_path); std::string size_str = SpaceSizeToString(size); @@ -592,7 +619,7 @@ void SaveDialogUi::DrawList() { int idx = 0; int max_idx = 0; bool is_min = pos == FocusPos::DATAOLDEST; - std::filesystem::file_time_type max_write{}; + std::chrono::system_clock::time_point max_write{}; if (state->new_item.has_value()) { idx++; } diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.h b/src/core/libraries/save_data/dialog/savedatadialog_ui.h index 3f414470..aa67e1f5 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.h +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.h @@ -248,7 +248,7 @@ public: std::string date{}; std::string size{}; - std::filesystem::file_time_type last_write{}; + std::chrono::system_clock::time_point last_write{}; PSF pfo{}; bool is_corrupted{}; }; diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 93b3c20a..c515ebcb 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -149,7 +149,7 @@ struct OrbisSaveDataIcon { size_t dataSize; std::array _reserved; - Error LoadIcon(const std::filesystem::path& icon_path) { + Error LoadIcon(const fs::path& icon_path) { try { const Common::FS::IOFile file(icon_path, Common::FS::FileAccessMode::Read); dataSize = file.GetSize(); @@ -345,7 +345,9 @@ static bool match(std::string_view str, std::string_view pattern) { if (*pat_it == '_') { // 1 character wildcard ++str_it; ++pat_it; - } else if (*pat_it != *str_it) { + continue; + } + if (*pat_it != *str_it) { return false; } ++str_it; @@ -1230,7 +1232,7 @@ Error PS4_SYSV_ABI sceSaveDataLoadIcon(const OrbisSaveDataMountPoint* mountPoint return Error::PARAMETER; } LOG_DEBUG(Lib_SaveData, "called"); - std::filesystem::path path; + fs::path path; const std::string_view mount_point_str{mountPoint->data}; for (const auto& instance : g_mount_slots) { if (instance.has_value() && instance->GetMountPoint() == mount_point_str) { @@ -1375,7 +1377,7 @@ Error PS4_SYSV_ABI sceSaveDataSaveIcon(const OrbisSaveDataMountPoint* mountPoint return Error::PARAMETER; } LOG_DEBUG(Lib_SaveData, "called"); - std::filesystem::path path; + fs::path path; const std::string_view mount_point_str{mountPoint->data}; for (const auto& instance : g_mount_slots) { if (instance.has_value() && instance->GetMountPoint() == mount_point_str) { diff --git a/src/core/libraries/system/systemservice.cpp b/src/core/libraries/system/systemservice.cpp index 9c6fe8f2..4e7f9257 100644 --- a/src/core/libraries/system/systemservice.cpp +++ b/src/core/libraries/system/systemservice.cpp @@ -1943,7 +1943,7 @@ int PS4_SYSV_ABI sceSystemServiceRaiseExceptionLocalProcess() { } s32 PS4_SYSV_ABI sceSystemServiceReceiveEvent(OrbisSystemServiceEvent* event) { - LOG_ERROR(Lib_SystemService, "(STUBBED) called, event type = {:#x}", (int)event->eventType); + LOG_ERROR(Lib_SystemService, "(STUBBED) called"); if (event == nullptr) { return ORBIS_SYSTEM_SERVICE_ERROR_PARAMETER; } diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 2e97da81..d8013614 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -12,9 +12,9 @@ #include "core/libraries/kernel/time.h" #include "core/libraries/videoout/driver.h" #include "core/platform.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_presenter.h" -extern std::unique_ptr renderer; +extern std::unique_ptr presenter; extern std::unique_ptr liverpool; namespace Libraries::VideoOut { @@ -135,7 +135,7 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co .address_right = 0, }; - renderer->RegisterVideoOutSurface(group, address); + presenter->RegisterVideoOutSurface(group, address); LOG_INFO(Lib_VideoOut, "buffers[{}] = {:#x}", i + startIndex, address); } @@ -163,9 +163,9 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) { void VideoOutDriver::Flip(const Request& req) { // Whatever the game is rendering show splash if it is active - if (!renderer->ShowSplash(req.frame)) { + if (!presenter->ShowSplash(req.frame)) { // Present the frame. - renderer->Present(req.frame); + presenter->Present(req.frame); } // Update flip status. @@ -200,8 +200,11 @@ void VideoOutDriver::Flip(const Request& req) { } void VideoOutDriver::DrawBlankFrame() { - const auto empty_frame = renderer->PrepareBlankFrame(false); - renderer->Present(empty_frame); + if (presenter->ShowSplash(nullptr)) { + return; + } + const auto empty_frame = presenter->PrepareBlankFrame(false); + presenter->Present(empty_frame); } bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, @@ -225,7 +228,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, // point VO surface is ready to be presented, and we will need have an actual state of // Vulkan image at the time of frame presentation. liverpool->SendCommand([=, this]() { - renderer->FlushDraw(); + presenter->FlushDraw(); SubmitFlipInternal(port, index, flip_arg, is_eop); }); } else { @@ -239,11 +242,11 @@ void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_ bool is_eop /*= false*/) { Vulkan::Frame* frame; if (index == -1) { - frame = renderer->PrepareBlankFrame(is_eop); + frame = presenter->PrepareBlankFrame(is_eop); } else { const auto& buffer = port->buffer_slots[index]; const auto& group = port->groups[buffer.group_index]; - frame = renderer->PrepareFrame(group, buffer.address_left, is_eop); + frame = presenter->PrepareFrame(group, buffer.address_left, is_eop); } std::scoped_lock lock{mutex}; diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 2e478b9e..ec01b621 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -74,7 +74,7 @@ struct ServiceThreadParams { class VideoOutDriver { public: - explicit VideoOutDriver(u32 width, u32 height); + VideoOutDriver(u32 width, u32 height); ~VideoOutDriver(); int Open(const ServiceThreadParams* params); diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 31b8a21c..0258074d 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -11,6 +11,9 @@ #include "core/libraries/videoout/video_out.h" #include "core/loader/symbols_resolver.h" #include "core/platform.h" +#include "video_core/renderer_vulkan/vk_presenter.h" + +extern std::unique_ptr presenter; namespace Libraries::VideoOut { @@ -297,6 +300,28 @@ s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle) { return ORBIS_OK; } +s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma) { + if (gamma < 0.1f || gamma > 2.0f) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_VALUE; + } + settings->gamma = gamma; + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings) { + if (settings == nullptr) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_ADDRESS; + } + + auto* port = driver->GetPort(handle); + if (!port) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; + } + + presenter->GetGammaRef() = settings->gamma; + return ORBIS_OK; +} + void RegisterLib(Core::Loader::SymbolsResolver* sym) { driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); @@ -329,6 +354,10 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("U2JJtSqNKZI", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetEventId); LIB_FUNCTION("rWUTcKdkUzQ", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetEventData); + LIB_FUNCTION("DYhhWbJSeRg", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, + sceVideoOutColorSettingsSetGamma); + LIB_FUNCTION("pv9CI5VC+R0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, + sceVideoOutAdjustColor); // openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1 LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen); diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 9568e106..0680a849 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -88,6 +88,11 @@ struct SceVideoOutDeviceCapabilityInfo { u64 capability; }; +struct SceVideoOutColorSettings { + float gamma; + u32 reserved[3]; +}; + void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, PixelFormat pixelFormat, u32 tilingMode, u32 aspectRatio, u32 width, u32 height, u32 pitchInPixel); @@ -106,6 +111,8 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle); int PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev); int PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, int64_t* data); +s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma); +s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings); // Internal system functions void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr); diff --git a/src/imgui/renderer/imgui_core.cpp b/src/imgui/renderer/imgui_core.cpp index 311e86a3..47f60e8e 100644 --- a/src/imgui/renderer/imgui_core.cpp +++ b/src/imgui/renderer/imgui_core.cpp @@ -14,7 +14,7 @@ #include "imgui_internal.h" #include "sdl_window.h" #include "texture_manager.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_presenter.h" #include "imgui_fonts/notosansjp_regular.ttf.g.cpp" #include "imgui_fonts/proggyvector_regular.ttf.g.cpp" diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 959b055c..f2ee8789 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -22,6 +22,9 @@ #include "main_window.h" #include "settings_dialog.h" #include "video_core/renderer_vulkan/vk_instance.h" +#ifdef ENABLE_DISCORD_RPC +#include "common/discord_rpc_handler.h" +#endif MainWindow::MainWindow(QWidget* parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); @@ -76,12 +79,13 @@ bool MainWindow::Init() { "Games: " + QString::number(numGames) + " (" + QString::number(duration.count()) + "ms)"; statusBar->showMessage(statusMessage); - // Initialize Discord RPC +#ifdef ENABLE_DISCORD_RPC if (Config::getEnableDiscordRPC()) { auto* rpc = Common::Singleton::Instance(); rpc->init(); rpc->setStatusIdling(); } +#endif return true; } diff --git a/src/qt_gui/main_window.h b/src/qt_gui/main_window.h index 7dc85f0a..5ae2540e 100644 --- a/src/qt_gui/main_window.h +++ b/src/qt_gui/main_window.h @@ -9,7 +9,6 @@ #include "background_music_player.h" #include "common/config.h" -#include "common/discord_rpc_handler.h" #include "common/path_util.h" #include "core/file_format/psf.h" #include "core/file_sys/fs.h" diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index 83582663..abbd39ed 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -173,6 +173,7 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge BackgroundMusicPlayer::getInstance().setVolume(val); }); +#ifdef ENABLE_DISCORD_RPC connect(ui->discordRPCCheckbox, &QCheckBox::stateChanged, this, [](int val) { Config::setEnableDiscordRPC(val); auto* rpc = Common::Singleton::Instance(); @@ -183,6 +184,7 @@ SettingsDialog::SettingsDialog(std::span physical_devices, QWidge rpc->shutdown(); } }); +#endif } // Input TAB diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 9fb441f3..fd5a5889 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -225,6 +225,7 @@ public: void V_MAX3_U32(bool is_signed, const GcnInst& inst); void V_MED3_F32(const GcnInst& inst); void V_MED3_I32(const GcnInst& inst); + void V_MED3_U32(const GcnInst& inst); void V_SAD(const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); void V_CVT_PK_U16_U32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 433f9dce..7ac3131c 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -357,6 +357,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MED3_F32(inst); case Opcode::V_MED3_I32: return V_MED3_I32(inst); + case Opcode::V_MED3_U32: + return V_MED3_U32(inst); case Opcode::V_SAD_U32: return V_SAD_U32(inst); case Opcode::V_CVT_PK_U16_U32: @@ -1092,6 +1094,14 @@ void Translator::V_MED3_I32(const GcnInst& inst) { SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); } +void Translator::V_MED3_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + const IR::U32 mmx = ir.UMin(ir.UMax(src0, src1), src2); + SetDst(inst.dst[0], ir.UMax(ir.UMin(src0, src1), mmx)); +} + void Translator::V_SAD(const GcnInst& inst) { const IR::U32 abs_diff = ir.IAbs(ir.ISub(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); SetDst(inst.dst[0], ir.IAdd(abs_diff, GetSrc(inst.src[2]))); diff --git a/src/shader_recompiler/ir/breadth_first_search.h b/src/shader_recompiler/ir/breadth_first_search.h index b042ae3d..b3ed8720 100644 --- a/src/shader_recompiler/ir/breadth_first_search.h +++ b/src/shader_recompiler/ir/breadth_first_search.h @@ -14,8 +14,8 @@ namespace Shader::IR { // Use typename Instruction so the function can be used to return either const or mutable // Insts depending on the context. template -auto BreadthFirstSearch(Instruction* inst, Pred&& pred) - -> std::invoke_result_t { +auto BreadthFirstSearch(Instruction* inst, + Pred&& pred) -> std::invoke_result_t { // Most often case the instruction is the desired already. if (std::optional result = pred(inst)) { return result; @@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst, Pred&& pred) } template -auto BreadthFirstSearch(const Value& value, Pred&& pred) - -> std::invoke_result_t { +auto BreadthFirstSearch(const Value& value, + Pred&& pred) -> std::invoke_result_t { if (value.IsImmediate()) { // Nothing to do with immediates return std::nullopt; diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index 200d7f42..be640297 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -53,7 +53,7 @@ constexpr Type F64x3{Type::F64x3}; constexpr Type F64x4{Type::F64x4}; constexpr Type StringLiteral{Type::StringLiteral}; -constexpr OpcodeMeta META_TABLE[]{ +constexpr OpcodeMeta META_TABLE[] { #define OPCODE(name_token, type_token, ...) \ { \ .name{#name_token}, \ diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 19579f66..64f842c4 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -32,7 +32,9 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info const RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; - ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + if (code[0] != token_mov_vcchi) { + LOG_WARNING(Render_Recompiler, "First instruction is not s_mov_b32 vcc_hi, #imm"); + } Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size()); Gcn::GcnDecodeContext decoder; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index d94e4329..0595a242 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -88,6 +88,32 @@ struct Liverpool { } }; + static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) { + constexpr u32 token_mov_vcchi = 0xBEEB03FF; + + if (code[0] == token_mov_vcchi) { + const auto* info = std::bit_cast(code + (code[1] + 1) * 2); + if (info->Valid()) { + return *info; + } + } + + // First instruction is not s_mov_b32 vcc_hi, #imm, + // which means we cannot get the binary info via said instruction. + // The easiest solution is to iterate through each dword and break + // on the first instance of the binary info. + constexpr size_t signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8); + const u32* end = code + search_limit; + + for (const u32* it = code; it < end; ++it) { + if (const BinaryInfo* info = std::bit_cast(it); info->Valid()) { + return *info; + } + } + + UNREACHABLE_MSG("Shader binary info not found."); + } + struct ShaderProgram { u32 address_lo; BitField<0, 8, u32> address_hi; @@ -113,8 +139,7 @@ struct Liverpool { std::span Code() const { const u32* code = Address(); - BinaryInfo bininfo; - std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + const BinaryInfo& bininfo = SearchBinaryInfo(code); const u32 num_dwords = bininfo.length / sizeof(u32); return std::span{code, num_dwords}; } @@ -166,27 +191,24 @@ struct Liverpool { std::span Code() const { const u32* code = Address(); - BinaryInfo bininfo; - std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + const BinaryInfo& bininfo = SearchBinaryInfo(code); const u32 num_dwords = bininfo.length / sizeof(u32); return std::span{code, num_dwords}; } }; template - static constexpr auto* GetBinaryInfo(const Shader& sh) { + static constexpr const BinaryInfo& GetBinaryInfo(const Shader& sh) { const auto* code = sh.template Address(); - const auto* bininfo = std::bit_cast(code + (code[1] + 1) * 2); - // ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header"); - return bininfo; + return SearchBinaryInfo(code); } static constexpr Shader::ShaderParams GetParams(const auto& sh) { - auto* bininfo = GetBinaryInfo(sh); + auto& bininfo = GetBinaryInfo(sh); return { .user_data = sh.user_data, .code = sh.Code(), - .hash = bininfo->shader_hash, + .hash = bininfo.shader_hash, }; } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f2b6cc2d..4ef8bcdb 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -7,6 +7,8 @@ set(SHADER_FILES detile_m32x1.comp detile_m32x2.comp detile_m32x4.comp + fs_tri.vert + post_process.frag ) set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) diff --git a/src/video_core/host_shaders/fs_tri.vert b/src/video_core/host_shaders/fs_tri.vert new file mode 100644 index 00000000..7b82c11a --- /dev/null +++ b/src/video_core/host_shaders/fs_tri.vert @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(location = 0) out vec2 uv; + +void main() { + vec2 pos = vec2( + float((gl_VertexIndex & 1u) << 2u), + float((gl_VertexIndex & 2u) << 1u) + ); + gl_Position = vec4(pos - vec2(1.0, 1.0), 0.0, 1.0); + uv = pos * 0.5; +} diff --git a/src/video_core/host_shaders/post_process.frag b/src/video_core/host_shaders/post_process.frag new file mode 100644 index 00000000..fcced323 --- /dev/null +++ b/src/video_core/host_shaders/post_process.frag @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout (location = 0) in vec2 uv; +layout (location = 0) out vec4 color; + +layout (binding = 0) uniform sampler2D texSampler; + +layout(push_constant) uniform settings { + float gamma; +} pp; + +void main() +{ + vec4 color_linear = texture(texSampler, uv); + color = pow(color_linear, vec4(1.0/(2.2 + 1.0 - pp.gamma))); +} diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index f42c5829..258e7f39 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -652,7 +652,7 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu } vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) { + Liverpool::ColorBuffer::SwapMode comp_swap) { const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; const bool comp_swap_alt_reverse = @@ -664,9 +664,9 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, case vk::Format::eB8G8R8A8Unorm: return vk::Format::eR8G8B8A8Unorm; case vk::Format::eR8G8B8A8Srgb: - return is_vo_surface ? vk::Format::eB8G8R8A8Unorm : vk::Format::eB8G8R8A8Srgb; + return vk::Format::eB8G8R8A8Srgb; case vk::Format::eB8G8R8A8Srgb: - return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb; + return vk::Format::eR8G8B8A8Srgb; case vk::Format::eA2B10G10R10UnormPack32: return vk::Format::eA2R10G10B10UnormPack32; default: @@ -677,20 +677,10 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, case vk::Format::eR8G8B8A8Unorm: return vk::Format::eA8B8G8R8UnormPack32; case vk::Format::eR8G8B8A8Srgb: - return is_vo_surface ? vk::Format::eA8B8G8R8UnormPack32 - : vk::Format::eA8B8G8R8SrgbPack32; + return vk::Format::eA8B8G8R8SrgbPack32; default: break; } - } else if (comp_swap_alt_reverse) { - return base_format; - } else { - if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) { - return vk::Format::eR8G8B8A8Unorm; - } - if (is_vo_surface && base_format == vk::Format::eB8G8R8A8Srgb) { - return vk::Format::eB8G8R8A8Unorm; - } } return base_format; } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 5fb04e5f..70e707fa 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -51,7 +51,7 @@ std::span SurfaceFormats(); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); vk::Format AdjustColorBufferFormat(vk::Format base_format, - Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface); + Liverpool::ColorBuffer::SwapMode comp_swap); struct DepthFormatInfo { Liverpool::DepthBuffer::ZFormat z_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index da098fa3..b576d478 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,13 +11,13 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_presenter.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -extern std::unique_ptr renderer; +extern std::unique_ptr presenter; namespace Vulkan { @@ -265,9 +265,8 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); - key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + key.color_formats[remapped_cb] = + LiverpoolToVK::AdjustColorBufferFormat(base_format, col_buf.info.comp_swap.Value()); key.color_num_formats[remapped_cb] = col_buf.NumFormat(); if (base_format == key.color_formats[remapped_cb]) { key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); @@ -293,8 +292,8 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } - const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); - if (!bininfo->Valid()) { + const auto& bininfo = Liverpool::GetBinaryInfo(*pgm); + if (!bininfo.Valid()) { LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); key.stage_hashes[stage_out_idx] = 0; infos[stage_out_idx] = nullptr; @@ -358,6 +357,8 @@ bool PipelineCache::RefreshGraphicsKey() { } } + u32 num_samples = 1u; + // Second pass to fill remain CB pipeline key data for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -374,8 +375,16 @@ bool PipelineCache::RefreshGraphicsKey() { key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)}; key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); + num_samples = std::max(num_samples, 1u << col_buf.attrib.num_samples_log2); + ++remapped_cb; } + + // It seems that the number of samples > 1 set in the AA config doesn't mean we're always + // rendering with MSAA, so we need to derive MS ratio from the CB settings. + num_samples = std::max(num_samples, regs.depth_buffer.NumSamples()); + key.num_samples = num_samples; + return true; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp similarity index 51% rename from src/video_core/renderer_vulkan/renderer_vulkan.cpp rename to src/video_core/renderer_vulkan/vk_presenter.cpp index 64a48365..d4972cba 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -4,18 +4,21 @@ #include "common/config.h" #include "common/debug.h" #include "common/singleton.h" +#include "core/debug_state.h" +#include "core/devtools/layer.h" #include "core/file_format/splash.h" #include "core/libraries/system/systemservice.h" #include "imgui/renderer/imgui_core.h" #include "sdl_window.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_presenter.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/image.h" -#include +#include "video_core/host_shaders/fs_tri_vert.h" +#include "video_core/host_shaders/post_process_frag.h" -#include "core/debug_state.h" -#include "core/devtools/layer.h" +#include namespace Vulkan { @@ -92,7 +95,203 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for return MakeImageBlit(frame_width, frame_height, dst_width, dst_height, offset_x, offset_y); } -RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_) +static vk::Format FormatToUnorm(vk::Format fmt) { + switch (fmt) { + case vk::Format::eR8G8B8A8Srgb: + return vk::Format::eR8G8B8A8Unorm; + case vk::Format::eB8G8R8A8Srgb: + return vk::Format::eB8G8R8A8Unorm; + default: + UNREACHABLE(); + } +} + +void Presenter::CreatePostProcessPipeline() { + static const std::array pp_shaders{ + HostShaders::FS_TRI_VERT, + HostShaders::POST_PROCESS_FRAG, + }; + + boost::container::static_vector bindings{ + { + .binding = 0, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eFragment, + }, + }; + + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + auto desc_layout_result = instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); + ASSERT_MSG(desc_layout_result.result == vk::Result::eSuccess, + "Failed to create descriptor set layout: {}", + vk::to_string(desc_layout_result.result)); + pp_desc_set_layout = std::move(desc_layout_result.value); + + const vk::PushConstantRange push_constants = { + .stageFlags = vk::ShaderStageFlagBits::eFragment, + .offset = 0, + .size = sizeof(PostProcessSettings), + }; + + const auto& vs_module = + Vulkan::Compile(pp_shaders[0], vk::ShaderStageFlagBits::eVertex, instance.GetDevice()); + ASSERT(vs_module); + Vulkan::SetObjectName(instance.GetDevice(), vs_module, "fs_tri.vert"); + + const auto& fs_module = + Vulkan::Compile(pp_shaders[1], vk::ShaderStageFlagBits::eFragment, instance.GetDevice()); + ASSERT(fs_module); + Vulkan::SetObjectName(instance.GetDevice(), vs_module, "post_process.frag"); + + const std::array shaders_ci{ + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = vs_module, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = fs_module, + .pName = "main", + }, + }; + + const vk::DescriptorSetLayout set_layout = *pp_desc_set_layout; + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1U, + .pSetLayouts = &set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constants, + }; + auto [layout_result, layout] = instance.GetDevice().createPipelineLayoutUnique(layout_info); + ASSERT_MSG(layout_result == vk::Result::eSuccess, "Failed to create pipeline layout: {}", + vk::to_string(layout_result)); + pp_pipeline_layout = std::move(layout); + + const std::array pp_color_formats{ + vk::Format::eB8G8R8A8Unorm, // swapchain.GetSurfaceFormat().format, + }; + const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { + .colorAttachmentCount = 1u, + .pColorAttachmentFormats = pp_color_formats.data(), + }; + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 0u, + .vertexAttributeDescriptionCount = 0u, + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleList, + }; + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1}, + }; + + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = vk::PolygonMode::eFill, + .cullMode = vk::CullModeFlagBits::eBack, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + }; + + const std::array attachments{ + vk::PipelineColorBlendAttachmentState{ + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .logicOp = vk::LogicOp::eCopy, + .attachmentCount = attachments.size(), + .pAttachments = attachments.data(), + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .pNext = &pipeline_rendering_ci, + .stageCount = static_cast(shaders_ci.size()), + .pStages = shaders_ci.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = *pp_pipeline_layout, + }; + + auto result = instance.GetDevice().createGraphicsPipelineUnique( + /*pipeline_cache*/ {}, pipeline_info); + if (result.result == vk::Result::eSuccess) { + pp_pipeline = std::move(result.value); + } else { + UNREACHABLE_MSG("Post process pipeline creation failed!"); + } + + // Once pipeline is compiled, we don't need the shader module anymore + instance.GetDevice().destroyShaderModule(vs_module); + instance.GetDevice().destroyShaderModule(fs_module); + + // Create sampler resource + const vk::SamplerCreateInfo sampler_ci = { + .magFilter = vk::Filter::eLinear, + .minFilter = vk::Filter::eLinear, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + }; + auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); + ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}", + vk::to_string(sampler_result)); + pp_sampler = std::move(smplr); +} + +Presenter::Presenter(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_) : window{window_}, liverpool{liverpool_}, instance{window, Config::getGpuId(), Config::vkValidationEnabled(), Config::vkCrashDiagnosticEnabled()}, @@ -115,12 +314,15 @@ RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* free_queue.push(&frame); } + CreatePostProcessPipeline(); + // Setup ImGui - ImGui::Core::Initialize(instance, window, num_images, swapchain.GetSurfaceFormat().format); + ImGui::Core::Initialize(instance, window, num_images, + FormatToUnorm(swapchain.GetSurfaceFormat().format)); ImGui::Layer::AddLayer(Common::Singleton::Instance()); } -RendererVulkan::~RendererVulkan() { +Presenter::~Presenter() { ImGui::Layer::RemoveLayer(Common::Singleton::Instance()); draw_scheduler.Finish(); const vk::Device device = instance.GetDevice(); @@ -132,7 +334,7 @@ RendererVulkan::~RendererVulkan() { ImGui::Core::Shutdown(device); } -void RendererVulkan::RecreateFrame(Frame* frame, u32 width, u32 height) { +void Presenter::RecreateFrame(Frame* frame, u32 width, u32 height) { const vk::Device device = instance.GetDevice(); if (frame->image_view) { device.destroyImageView(frame->image_view); @@ -143,6 +345,7 @@ void RendererVulkan::RecreateFrame(Frame* frame, u32 width, u32 height) { const vk::Format format = swapchain.GetSurfaceFormat().format; const vk::ImageCreateInfo image_info = { + .flags = vk::ImageCreateFlagBits::eMutableFormat, .imageType = vk::ImageType::e2D, .format = format, .extent = {width, height, 1}, @@ -177,7 +380,7 @@ void RendererVulkan::RecreateFrame(Frame* frame, u32 width, u32 height) { const vk::ImageViewCreateInfo view_info = { .image = frame->image, .viewType = vk::ImageViewType::e2D, - .format = format, + .format = FormatToUnorm(format), .subresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, @@ -194,7 +397,7 @@ void RendererVulkan::RecreateFrame(Frame* frame, u32 width, u32 height) { frame->height = height; } -bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { +bool Presenter::ShowSplash(Frame* frame /*= nullptr*/) { const auto* splash = Common::Singleton::Instance(); if (splash->GetImageData().empty()) { return false; @@ -204,6 +407,9 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { return false; } + draw_scheduler.EndRendering(); + const auto cmdbuf = draw_scheduler.CommandBuffer(); + if (!frame) { if (!splash_img.has_value()) { VideoCore::ImageInfo info{}; @@ -214,16 +420,74 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { info.pitch = splash->GetImageInfo().width; info.guest_address = VAddr(splash->GetImageData().data()); info.guest_size_bytes = splash->GetImageData().size(); + info.mips_layout.emplace_back(splash->GetImageData().size(), + splash->GetImageInfo().width, + splash->GetImageInfo().height, 0); splash_img.emplace(instance, present_scheduler, info); texture_cache.RefreshImage(*splash_img); + + splash_img->Transit(vk::ImageLayout::eTransferSrcOptimal, + vk::AccessFlagBits2::eTransferRead, {}, cmdbuf); } - frame = PrepareFrameInternal(*splash_img); + + frame = GetRenderFrame(); } + + const auto frame_subresources = vk::ImageSubresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + const auto pre_barrier = + vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .image = frame->image, + .subresourceRange{frame_subresources}}; + + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &pre_barrier, + }); + + cmdbuf.blitImage(splash_img->image, vk::ImageLayout::eTransferSrcOptimal, frame->image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageBlitFit(splash->GetImageInfo().width, splash->GetImageInfo().height, + frame->width, frame->height), + vk::Filter::eLinear); + + const auto post_barrier = + vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .image = frame->image, + .subresourceRange{frame_subresources}}; + + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &post_barrier, + }); + + // Flush frame creation commands. + frame->ready_semaphore = draw_scheduler.GetMasterSemaphore()->Handle(); + frame->ready_tick = draw_scheduler.CurrentTick(); + SubmitInfo info{}; + draw_scheduler.Flush(info); + Present(frame); return true; } -Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop) { +Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); @@ -234,9 +498,6 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}, - cmdbuf); - const auto frame_subresources = vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, @@ -244,62 +505,114 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }; - const std::array pre_barrier{ - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferRead, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eTransferDstOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = frame->image, - .subresourceRange{frame_subresources}, - }, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, - {}, {}, pre_barrier); - // Clear the frame image before blitting to avoid artifacts. - const vk::ClearColorValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}}; - cmdbuf.clearColorImage(frame->image, vk::ImageLayout::eTransferDstOptimal, clear_color, - frame_subresources); - - const auto blitBarrier = + const auto pre_barrier = vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eColorAttachmentOptimal, .image = frame->image, .subresourceRange{frame_subresources}}; cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .imageMemoryBarrierCount = 1, - .pImageMemoryBarriers = &blitBarrier, + .pImageMemoryBarriers = &pre_barrier, }); - // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image. - cmdbuf.blitImage(image.image, image.last_state.layout, frame->image, - vk::ImageLayout::eTransferDstOptimal, - MakeImageBlitFit(image.info.size.width, image.info.size.height, frame->width, - frame->height), - vk::Filter::eLinear); + if (image_id != VideoCore::NULL_IMAGE_ID) { + auto& image = texture_cache.GetImage(image_id); + image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}, + cmdbuf); - const vk::ImageMemoryBarrier post_barrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = frame->image, - .subresourceRange{frame_subresources}, - }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + static vk::DescriptorImageInfo image_info{ + .sampler = *pp_sampler, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }; + + VideoCore::ImageViewInfo info{}; + info.format = image.info.pixel_format; + if (auto view = image.FindView(info)) { + image_info.imageView = *texture_cache.GetImageView(view).image_view; + } else { + image_info.imageView = *texture_cache.RegisterImageView(image_id, info).image_view; + } + + static const std::array set_writes{ + vk::WriteDescriptorSet{ + .dstSet = VK_NULL_HANDLE, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .pImageInfo = &image_info, + }, + }; + + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *pp_pipeline); + + const std::array viewports = { + vk::Viewport{ + .x = 0.0f, + .y = 0.0f, + .width = 1.0f * frame->width, + .height = 1.0f * frame->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }, + }; + + const std::array scissors = { + vk::Rect2D{ + .offset = {0, 0}, + .extent = {frame->width, frame->height}, + }, + }; + cmdbuf.setViewport(0, viewports); + cmdbuf.setScissor(0, scissors); + + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pp_pipeline_layout, 0, + set_writes); + cmdbuf.pushConstants(*pp_pipeline_layout, vk::ShaderStageFlagBits::eFragment, 0, + sizeof(PostProcessSettings), &pp_settings); + + const std::array attachments = {vk::RenderingAttachmentInfo{ + .imageView = frame->image_view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = vk::AttachmentLoadOp::eDontCare, + .storeOp = vk::AttachmentStoreOp::eStore, + }}; + + vk::RenderingInfo rendering_info{ + .renderArea = + vk::Rect2D{ + .offset = {0, 0}, + .extent = {frame->width, frame->height}, + }, + .layerCount = 1, + .colorAttachmentCount = attachments.size(), + .pColorAttachments = attachments.data(), + }; + cmdbuf.beginRendering(rendering_info); + cmdbuf.draw(3, 1, 0, 0); + cmdbuf.endRendering(); + } + + const auto post_barrier = + vk::ImageMemoryBarrier2{.srcStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .srcAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eColorAttachmentOutput, + .dstAccessMask = vk::AccessFlagBits2::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .image = frame->image, + .subresourceRange{frame_subresources}}; + + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &post_barrier, + }); // Flush frame creation commands. frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle(); @@ -309,7 +622,7 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop return frame; } -void RendererVulkan::Present(Frame* frame) { +void Presenter::Present(Frame* frame) { // Recreate the swapchain if the window was resized. if (window.getWidth() != swapchain.GetExtent().width || window.getHeight() != swapchain.GetExtent().height) { @@ -423,7 +736,7 @@ void RendererVulkan::Present(Frame* frame) { DebugState.IncFlipFrameNum(); } -Frame* RendererVulkan::GetRenderFrame() { +Frame* Presenter::GetRenderFrame() { // Wait for free presentation frames Frame* frame; { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/vk_presenter.h similarity index 79% rename from src/video_core/renderer_vulkan/renderer_vulkan.h rename to src/video_core/renderer_vulkan/vk_presenter.h index a663622f..cb44a352 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -40,23 +40,29 @@ enum SchedulerType { class Rasterizer; -class RendererVulkan { +class Presenter { + struct PostProcessSettings { + float gamma = 1.0f; + }; + public: - explicit RendererVulkan(Frontend::WindowSDL& window, AmdGpu::Liverpool* liverpool); - ~RendererVulkan(); + Presenter(Frontend::WindowSDL& window, AmdGpu::Liverpool* liverpool); + ~Presenter(); + + float& GetGammaRef() { + return pp_settings.gamma; + } Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(info); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); - auto& image = texture_cache.GetImage(image_id); - return PrepareFrameInternal(image, is_eop); + return PrepareFrameInternal(image_id, is_eop); } Frame* PrepareBlankFrame(bool is_eop) { - auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); - return PrepareFrameInternal(image, is_eop); + return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -83,10 +89,16 @@ public: } private: - Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); + void CreatePostProcessPipeline(); + Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true); Frame* GetRenderFrame(); private: + PostProcessSettings pp_settings{}; + vk::UniquePipeline pp_pipeline{}; + vk::UniquePipelineLayout pp_pipeline_layout{}; + vk::UniqueDescriptorSetLayout pp_desc_set_layout{}; + vk::UniqueSampler pp_sampler{}; Frontend::WindowSDL& window; AmdGpu::Liverpool* liverpool; Instance instance; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 27120323..a8b4728c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -271,7 +271,7 @@ void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { const auto& hint = liverpool->last_cb_extent[col_buf_id]; VideoCore::ImageInfo image_info{col_buf, hint}; - VideoCore::ImageViewInfo view_info{col_buf, false /*!!image.info.usage.vo_buffer*/}; + VideoCore::ImageViewInfo view_info{col_buf}; const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info); const auto& image = texture_cache.GetImage(image_view.image_id); state.width = std::min(state.width, image.info.size.width); diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 055753db..2956a2a3 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -47,7 +47,7 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { // clang-format off // The table of macro tiles parameters for given tiling index (row) and bpp (column) -static constexpr std::array macro_tile_extents{ +static constexpr std::array macro_tile_extents_x1{ std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 @@ -66,26 +66,123 @@ static constexpr std::array macro_tile_extents{ std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10 std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11 - std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 12 + std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 12 std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 - std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{128u, 64u}, // 18 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A }; + +static constexpr std::array macro_tile_extents_x2{ + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11 + std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A +}; + +static constexpr std::array macro_tile_extents_x4{ + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A +}; + +static constexpr std::array macro_tile_extents_x8{ + std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11 + std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12 + std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14 + std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17 + std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19 + std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A +}; + +static constexpr std::array macro_tile_extents{ + macro_tile_extents_x1, + macro_tile_extents_x2, + macro_tile_extents_x4, + macro_tile_extents_x8, +}; // clang-format on static constexpr std::pair micro_tile_extent{8u, 8u}; static constexpr auto hw_pipe_interleave = 256u; static constexpr std::pair GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) { - ASSERT(num_samples == 1); + ASSERT(num_samples <= 8); const auto row = tiling_idx * 5; const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128 - return macro_tile_extents[row + column]; + return (macro_tile_extents[std::log2(num_samples)])[row + column]; } static constexpr std::pair ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp, @@ -116,9 +213,21 @@ static constexpr std::pair ImageSizeMicroTiled(u32 pitch, u32 heigh } static constexpr std::pair ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp, - u32 num_samples, u32 tiling_idx) { + u32 num_samples, u32 tiling_idx, + u32 mip_n) { const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples); ASSERT(pitch_align != 0 && height_align != 0); + bool downgrade_to_micro = false; + if (mip_n > 0) { + const bool is_less_than_tile = pitch < pitch_align || height < height_align; + // TODO: threshold check + downgrade_to_micro = is_less_than_tile; + } + + if (downgrade_to_micro) { + return ImageSizeMicroTiled(pitch, height, bpp, num_samples); + } + const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1); const auto height_aligned = (height + height_align - 1) & ~(height_align - 1); const auto log_sz = pitch_aligned * height_aligned * num_samples; @@ -274,9 +383,8 @@ void ImageInfo::UpdateSize() { case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { ASSERT(!props.is_block); - ASSERT(num_samples == 1); std::tie(mip_info.pitch, mip_info.size) = - ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx); + ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx, mip); break; } default: { diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 7dbf1230..8bde3794 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -125,14 +125,14 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso } } -ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, - bool is_vo_surface) noexcept { +ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept { const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); range.base.layer = col_buffer.view.slice_start; range.extent.layers = col_buffer.NumSlices() - range.base.layer; - format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( - base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); + type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; + format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat(base_format, + col_buffer.info.comp_swap.Value()); } ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, @@ -143,6 +143,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, is_storage = ctl.depth_write_enable; range.base.layer = view.slice_start; range.extent.layers = view.NumSlices() - range.base.layer; + type = range.extent.layers > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D; } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index ba8d2c72..23c703d2 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -19,7 +19,7 @@ namespace VideoCore { struct ImageViewInfo { ImageViewInfo() = default; ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept; - ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept; + ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer) noexcept; ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, AmdGpu::Liverpool::DepthView view, AmdGpu::Liverpool::DepthControl ctl); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 12bcb9f6..0e8dd7cc 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -28,7 +28,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& info.num_bits = 32; info.UpdateSize(); const ImageId null_id = slot_images.insert(instance, scheduler, info); - ASSERT(null_id.index == 0); + ASSERT(null_id.index == NULL_IMAGE_ID.index); const vk::Image& null_image = slot_images[null_id].image; Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); slot_images[null_id].flags = ImageFlagBits::Tracked; @@ -36,7 +36,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& ImageViewInfo view_info; const auto null_view_id = slot_image_views.insert(instance, view_info, slot_images[null_id], null_id); - ASSERT(null_view_id.index == 0); + ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index); const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get(); Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View"); } @@ -376,6 +376,10 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule return; } + if (image.info.num_samples > 1) { + return; + } + const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cafe7055..96970bfc 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -102,6 +102,9 @@ public: return slot_image_views[id]; } + /// Registers an image view for provided image + ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); + bool IsMeta(VAddr address) const { return surface_metas.contains(address); } @@ -181,9 +184,6 @@ private: } } - /// Registers an image view for provided image - ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); - /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);