diff --git a/CMakeLists.txt b/CMakeLists.txt index 6cb38fb017..28ce455b8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -650,6 +650,8 @@ add_library(Common STATIC Common/Math/lin/vec3.h Common/Math/math_util.cpp Common/Math/math_util.h + Common/Math/Statistics.h + Common/Math/Statistics.cpp Common/Net/HTTPClient.cpp Common/Net/HTTPClient.h Common/Net/HTTPHeaders.cpp diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 901006a0d9..aa0f9bb1e7 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -466,6 +466,7 @@ + @@ -886,6 +887,7 @@ + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 991b03a71f..0cb9e79423 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -422,6 +422,9 @@ GPU\Vulkan + + Math + @@ -797,6 +800,9 @@ GPU\Vulkan + + Math + diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index c1da294a0f..1696487d5c 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -515,6 +515,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { } MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea); steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType); + steps[i]->render.numDraws += steps[j]->render.numDraws; + steps[i]->render.numReads += steps[j]->render.numReads; // Cheaply skip the first step. steps[j]->stepType = VKRStepType::RENDER_SKIP; break; @@ -936,6 +938,8 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector &steps) { // So we don't consider it for other things, maybe doesn't matter. src->dependencies.clear(); src->stepType = VKRStepType::RENDER_SKIP; + dst->render.numDraws += src->render.numDraws; + dst->render.numReads += src->render.numReads; dst->render.pipelineFlags |= src->render.pipelineFlags; dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType); }; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 44a4455d98..7c7d8418c6 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -327,7 +327,12 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int img.tag = tag ? tag : "N/A"; } -VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) { +VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) + : vulkan_(vulkan), queueRunner_(vulkan), + initTimeMs_("initTimeMs"), + totalGPUTimeMs_("totalGPUTimeMs"), + renderCPUTimeMs_("renderCPUTimeMs") +{ inflightFramesAtStart_ = vulkan_->GetInflightFrames(); frameDataShared_.Init(vulkan); @@ -579,14 +584,25 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile std::stringstream str; char line[256]; - snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor)); + totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor)); + totalGPUTimeMs_.Format(line, sizeof(line)); str << line; - snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0); + renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0); + renderCPUTimeMs_.Format(line, sizeof(line)); str << line; for (int i = 0; i < numQueries - 1; i++) { uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask; double milliseconds = (double)diff * timestampConversionFactor; - snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds); + + // Can't use SimpleStat for these very easily since these are dynamic per frame. + // Only the first one is static, the initCmd. + // Could try some hashtable tracking for the rest, later. + if (i == 0) { + initTimeMs_.Update(milliseconds); + initTimeMs_.Format(line, sizeof(line)); + } else { + snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds); + } str << line; } frameData.profile.profileSummary = str.str(); @@ -1344,3 +1360,9 @@ void VulkanRenderManager::FlushSync() { frameData.syncDone = false; } } + +void VulkanRenderManager::ResetStats() { + initTimeMs_.Reset(); + totalGPUTimeMs_.Reset(); + renderCPUTimeMs_.Reset(); +} diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index e1f66c29e2..7ba98b485c 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -12,6 +12,7 @@ #include #include +#include "Common/Math/Statistics.h" #include "Common/Thread/Promise.h" #include "Common/System/Display.h" #include "Common/GPU/Vulkan/VulkanContext.h" @@ -465,6 +466,8 @@ public: return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES; } + void ResetStats(); + private: void EndCurRenderStep(); @@ -535,4 +538,9 @@ private: // pipelines to check and possibly create at the end of the current render pass. std::vector pipelinesToCheck_; + + // For nicer output in the little internal GPU profiler. + SimpleStat initTimeMs_; + SimpleStat totalGPUTimeMs_; + SimpleStat renderCPUTimeMs_; }; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 6c342f8e9e..dc1b806011 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -455,6 +455,10 @@ public: void FlushState() override {} + void ResetStats() override { + renderManager_.ResetStats(); + } + std::string GetInfoString(InfoField info) const override { // TODO: Make these actually query the right information switch (info) { diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 3e482687a3..132c0e02f9 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -735,6 +735,9 @@ public: // Flush state like scissors etc so the caller can do its own custom drawing. virtual void FlushState() {} + // This is called when we launch a new game, so any collected internal stats in the backends don't carry over. + virtual void ResetStats() {} + virtual int GetCurrentStepId() const = 0; protected: diff --git a/Common/Math/Statistics.cpp b/Common/Math/Statistics.cpp new file mode 100644 index 0000000000..c05dd84e85 --- /dev/null +++ b/Common/Math/Statistics.cpp @@ -0,0 +1,11 @@ +#include + +#include "Common/Math/Statistics.h" + +void SimpleStat::Format(char *buffer, size_t sz) { + if (min_ == INFINITY) { + snprintf(buffer, sz, "%s: N/A\n", name_); + } else { + snprintf(buffer, sz, "%s: %0.2f (%0.2f..%0.2f, avg %0.2f)\n", name_, value_, min_, max_, smoothed_); + } +} diff --git a/Common/Math/Statistics.h b/Common/Math/Statistics.h new file mode 100644 index 0000000000..0b03d1d420 --- /dev/null +++ b/Common/Math/Statistics.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +// Very simple stat for convenience. Keeps track of min, max, smoothed. +struct SimpleStat { + SimpleStat(const char *name) : name_(name) { Reset(); } + + void Update(double value) { + value_ = value; + if (min_ == INFINITY) { + smoothed_ = value; + } else { + // TODO: Make factor adjustable? + smoothed_ = 0.99 * smoothed_ + 0.01 * value; + } + if (value < min_) { + min_ = value; + } + if (value > max_) { + max_ = value; + } + } + + void Reset() { + value_ = 0.0; + smoothed_ = 0.0; // doens't really need init + min_ = INFINITY; + max_ = -INFINITY; + } + + void Format(char *buffer, size_t sz); + +private: + const char *name_; + + // These are initialized in Reset(). + double value_; + double min_; + double max_; + double smoothed_; +}; diff --git a/Common/Math/math_util.cpp b/Common/Math/math_util.cpp index 9b1a59a755..6e72791f87 100644 --- a/Common/Math/math_util.cpp +++ b/Common/Math/math_util.cpp @@ -1,44 +1,5 @@ #include "Common/Math/math_util.h" -#include -// QNX can only use RunFast mode and it is already the default. -#if defined(__ARM_ARCH_7A__) -// Enables 'RunFast' VFP mode. -void EnableFZ() { - int x; - asm( - "fmrx %[result],FPSCR \r\n" - "orr %[result],%[result],#16777216 \r\n" - "fmxr FPSCR,%[result]" - :[result] "=r" (x) : : - ); - //printf("ARM FPSCR: %08x\n",x); -} +#include -// New fastmode code from: http://pandorawiki.org/Floating_Point_Optimization -// These settings turbocharge the slow VFP unit on Cortex-A8 based chips by setting -// restrictions that permit running VFP instructions on the NEON unit. -// Denormal flush-to-zero, for example. -void FPU_SetFastMode() { - static const unsigned int x = 0x04086060; - static const unsigned int y = 0x03000000; - int r; - asm volatile ( - "fmrx %0, fpscr \n\t" //r0 = FPSCR - "and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060 - "orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000 - "fmxr fpscr, %0 \n\t" //FPSCR = r0 - : "=r"(r) - : "r"(x), "r"(y) - ); -} - -#else - -void EnableFZ() { - // TODO -} - -void FPU_SetFastMode() {} - -#endif +// Could delete this file, but might find use again. diff --git a/Common/Math/math_util.h b/Common/Math/math_util.h index fd47662b54..8f8962cab4 100644 --- a/Common/Math/math_util.h +++ b/Common/Math/math_util.h @@ -189,12 +189,3 @@ inline uint16_t ShrinkToHalf(float full) { FP16 fp = float_to_half_fast3(fp32); return fp.u; } - -// FPU control. -void EnableFZ(); - -// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode -// where they can schedule VFP instructions on the NEON unit (these implementations have -// very slow VFP units). -// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html -void FPU_SetFastMode(); diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index c36d39a6f1..3d14e9c1aa 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1215,7 +1215,7 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer } if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) { - BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "Blit_CopyFramebufferForColorTexture"); + BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture"); } } diff --git a/Qt/QtMain.cpp b/Qt/QtMain.cpp index bba1f1ae2b..c6381a112d 100644 --- a/Qt/QtMain.cpp +++ b/Qt/QtMain.cpp @@ -316,7 +316,6 @@ static int mainInternal(QApplication &a) { emugl->resize(pixel_xres, pixel_yres); emugl->showFullScreen(); #endif - EnableFZ(); // Disable screensaver #if defined(QT_HAS_SYSTEMINFO) QScreenSaver ssObject(emugl); diff --git a/SDL/SDLMain.cpp b/SDL/SDLMain.cpp index da966cabae..0a6daa5361 100644 --- a/SDL/SDLMain.cpp +++ b/SDL/SDLMain.cpp @@ -807,7 +807,6 @@ int main(int argc, char *argv[]) { } else { joystick = nullptr; } - EnableFZ(); int framecount = 0; bool mouseDown = false; diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index c29d8e67c6..bcf5a47a48 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -344,6 +344,8 @@ void EmuScreen::bootGame(const Path &filename) { loadingViewColor_->Divert(0xFFFFFFFF, 0.75f); loadingViewVisible_->Divert(UI::V_VISIBLE, 0.75f); + + screenManager()->getDrawContext()->ResetStats(); } void EmuScreen::bootComplete() { diff --git a/UWP/CommonUWP/CommonUWP.vcxproj b/UWP/CommonUWP/CommonUWP.vcxproj index 5c03d355c9..09e9173e41 100644 --- a/UWP/CommonUWP/CommonUWP.vcxproj +++ b/UWP/CommonUWP/CommonUWP.vcxproj @@ -387,6 +387,7 @@ + @@ -519,6 +520,7 @@ + @@ -639,4 +641,4 @@ - + \ No newline at end of file diff --git a/UWP/CommonUWP/CommonUWP.vcxproj.filters b/UWP/CommonUWP/CommonUWP.vcxproj.filters index 7ab8b012c7..23de400ce6 100644 --- a/UWP/CommonUWP/CommonUWP.vcxproj.filters +++ b/UWP/CommonUWP/CommonUWP.vcxproj.filters @@ -372,6 +372,9 @@ File + + Math + @@ -688,11 +691,9 @@ File - - - - ext\libpng17 - + + Math + @@ -701,5 +702,6 @@ Math\lin + - + \ No newline at end of file diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 42cae5cb28..305caf67a5 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -164,6 +164,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/Common/Input/InputState.cpp \ $(SRC)/Common/Math/fast/fast_matrix.c \ $(SRC)/Common/Math/math_util.cpp \ + $(SRC)/Common/Math/Statistics.cpp \ $(SRC)/Common/Math/curves.cpp \ $(SRC)/Common/Math/expression_parser.cpp \ $(SRC)/Common/Math/lin/vec3.cpp.arm \ diff --git a/ios/ViewController.mm b/ios/ViewController.mm index c90c30a22a..63d241cf46 100644 --- a/ios/ViewController.mm +++ b/ios/ViewController.mm @@ -807,6 +807,3 @@ void bindDefaultFBO() { [sharedViewController bindDefaultFBO]; } - -void EnableFZ(){}; -void DisableFZ(){}; diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 56c748b52a..375792b296 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -271,6 +271,7 @@ SOURCES_CXX += \ $(COMMONDIR)/Math/curves.cpp \ $(COMMONDIR)/Math/expression_parser.cpp \ $(COMMONDIR)/Math/math_util.cpp \ + $(COMMONDIR)/Math/Statistics.cpp \ $(COMMONDIR)/Math/lin/vec3.cpp \ $(COMMONDIR)/Math/lin/matrix4x4.cpp \ $(COMMONDIR)/Net/HTTPClient.cpp \