diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6cb38fb017..28ce455b8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -650,6 +650,8 @@ add_library(Common STATIC
Common/Math/lin/vec3.h
Common/Math/math_util.cpp
Common/Math/math_util.h
+ Common/Math/Statistics.h
+ Common/Math/Statistics.cpp
Common/Net/HTTPClient.cpp
Common/Net/HTTPClient.h
Common/Net/HTTPHeaders.cpp
diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj
index 901006a0d9..aa0f9bb1e7 100644
--- a/Common/Common.vcxproj
+++ b/Common/Common.vcxproj
@@ -466,6 +466,7 @@
+
@@ -886,6 +887,7 @@
+
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index 991b03a71f..0cb9e79423 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -422,6 +422,9 @@
GPU\Vulkan
+
+ Math
+
@@ -797,6 +800,9 @@
GPU\Vulkan
+
+ Math
+
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
index c1da294a0f..1696487d5c 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
@@ -515,6 +515,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) {
}
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
+ steps[i]->render.numDraws += steps[j]->render.numDraws;
+ steps[i]->render.numReads += steps[j]->render.numReads;
// Cheaply skip the first step.
steps[j]->stepType = VKRStepType::RENDER_SKIP;
break;
@@ -936,6 +938,8 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector &steps) {
// So we don't consider it for other things, maybe doesn't matter.
src->dependencies.clear();
src->stepType = VKRStepType::RENDER_SKIP;
+ dst->render.numDraws += src->render.numDraws;
+ dst->render.numReads += src->render.numReads;
dst->render.pipelineFlags |= src->render.pipelineFlags;
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
};
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index 44a4455d98..7c7d8418c6 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -327,7 +327,12 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
img.tag = tag ? tag : "N/A";
}
-VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
+VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
+ : vulkan_(vulkan), queueRunner_(vulkan),
+ initTimeMs_("initTimeMs"),
+ totalGPUTimeMs_("totalGPUTimeMs"),
+ renderCPUTimeMs_("renderCPUTimeMs")
+{
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
frameDataShared_.Init(vulkan);
@@ -579,14 +584,25 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
std::stringstream str;
char line[256];
- snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
+ totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
+ totalGPUTimeMs_.Format(line, sizeof(line));
str << line;
- snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
+ renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
+ renderCPUTimeMs_.Format(line, sizeof(line));
str << line;
for (int i = 0; i < numQueries - 1; i++) {
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
double milliseconds = (double)diff * timestampConversionFactor;
- snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
+
+ // Can't use SimpleStat for these very easily since these are dynamic per frame.
+ // Only the first one is static, the initCmd.
+ // Could try some hashtable tracking for the rest, later.
+ if (i == 0) {
+ initTimeMs_.Update(milliseconds);
+ initTimeMs_.Format(line, sizeof(line));
+ } else {
+ snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
+ }
str << line;
}
frameData.profile.profileSummary = str.str();
@@ -1344,3 +1360,9 @@ void VulkanRenderManager::FlushSync() {
frameData.syncDone = false;
}
}
+
+void VulkanRenderManager::ResetStats() {
+ initTimeMs_.Reset();
+ totalGPUTimeMs_.Reset();
+ renderCPUTimeMs_.Reset();
+}
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h
index e1f66c29e2..7ba98b485c 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.h
+++ b/Common/GPU/Vulkan/VulkanRenderManager.h
@@ -12,6 +12,7 @@
#include
#include
+#include "Common/Math/Statistics.h"
#include "Common/Thread/Promise.h"
#include "Common/System/Display.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
@@ -465,6 +466,8 @@ public:
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
}
+ void ResetStats();
+
private:
void EndCurRenderStep();
@@ -535,4 +538,9 @@ private:
// pipelines to check and possibly create at the end of the current render pass.
std::vector pipelinesToCheck_;
+
+ // For nicer output in the little internal GPU profiler.
+ SimpleStat initTimeMs_;
+ SimpleStat totalGPUTimeMs_;
+ SimpleStat renderCPUTimeMs_;
};
diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp
index 6c342f8e9e..dc1b806011 100644
--- a/Common/GPU/Vulkan/thin3d_vulkan.cpp
+++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp
@@ -455,6 +455,10 @@ public:
void FlushState() override {}
+ void ResetStats() override {
+ renderManager_.ResetStats();
+ }
+
std::string GetInfoString(InfoField info) const override {
// TODO: Make these actually query the right information
switch (info) {
diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h
index 3e482687a3..132c0e02f9 100644
--- a/Common/GPU/thin3d.h
+++ b/Common/GPU/thin3d.h
@@ -735,6 +735,9 @@ public:
// Flush state like scissors etc so the caller can do its own custom drawing.
virtual void FlushState() {}
+ // This is called when we launch a new game, so any collected internal stats in the backends don't carry over.
+ virtual void ResetStats() {}
+
virtual int GetCurrentStepId() const = 0;
protected:
diff --git a/Common/Math/Statistics.cpp b/Common/Math/Statistics.cpp
new file mode 100644
index 0000000000..c05dd84e85
--- /dev/null
+++ b/Common/Math/Statistics.cpp
@@ -0,0 +1,11 @@
+#include
+
+#include "Common/Math/Statistics.h"
+
+void SimpleStat::Format(char *buffer, size_t sz) {
+ if (min_ == INFINITY) {
+ snprintf(buffer, sz, "%s: N/A\n", name_);
+ } else {
+ snprintf(buffer, sz, "%s: %0.2f (%0.2f..%0.2f, avg %0.2f)\n", name_, value_, min_, max_, smoothed_);
+ }
+}
diff --git a/Common/Math/Statistics.h b/Common/Math/Statistics.h
new file mode 100644
index 0000000000..0b03d1d420
--- /dev/null
+++ b/Common/Math/Statistics.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include
+
+// Very simple stat for convenience. Keeps track of min, max, smoothed.
+struct SimpleStat {
+ SimpleStat(const char *name) : name_(name) { Reset(); }
+
+ void Update(double value) {
+ value_ = value;
+ if (min_ == INFINITY) {
+ smoothed_ = value;
+ } else {
+ // TODO: Make factor adjustable?
+ smoothed_ = 0.99 * smoothed_ + 0.01 * value;
+ }
+ if (value < min_) {
+ min_ = value;
+ }
+ if (value > max_) {
+ max_ = value;
+ }
+ }
+
+ void Reset() {
+ value_ = 0.0;
+ smoothed_ = 0.0; // doens't really need init
+ min_ = INFINITY;
+ max_ = -INFINITY;
+ }
+
+ void Format(char *buffer, size_t sz);
+
+private:
+ const char *name_;
+
+ // These are initialized in Reset().
+ double value_;
+ double min_;
+ double max_;
+ double smoothed_;
+};
diff --git a/Common/Math/math_util.cpp b/Common/Math/math_util.cpp
index 9b1a59a755..6e72791f87 100644
--- a/Common/Math/math_util.cpp
+++ b/Common/Math/math_util.cpp
@@ -1,44 +1,5 @@
#include "Common/Math/math_util.h"
-#include
-// QNX can only use RunFast mode and it is already the default.
-#if defined(__ARM_ARCH_7A__)
-// Enables 'RunFast' VFP mode.
-void EnableFZ() {
- int x;
- asm(
- "fmrx %[result],FPSCR \r\n"
- "orr %[result],%[result],#16777216 \r\n"
- "fmxr FPSCR,%[result]"
- :[result] "=r" (x) : :
- );
- //printf("ARM FPSCR: %08x\n",x);
-}
+#include
-// New fastmode code from: http://pandorawiki.org/Floating_Point_Optimization
-// These settings turbocharge the slow VFP unit on Cortex-A8 based chips by setting
-// restrictions that permit running VFP instructions on the NEON unit.
-// Denormal flush-to-zero, for example.
-void FPU_SetFastMode() {
- static const unsigned int x = 0x04086060;
- static const unsigned int y = 0x03000000;
- int r;
- asm volatile (
- "fmrx %0, fpscr \n\t" //r0 = FPSCR
- "and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060
- "orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000
- "fmxr fpscr, %0 \n\t" //FPSCR = r0
- : "=r"(r)
- : "r"(x), "r"(y)
- );
-}
-
-#else
-
-void EnableFZ() {
- // TODO
-}
-
-void FPU_SetFastMode() {}
-
-#endif
+// Could delete this file, but might find use again.
diff --git a/Common/Math/math_util.h b/Common/Math/math_util.h
index fd47662b54..8f8962cab4 100644
--- a/Common/Math/math_util.h
+++ b/Common/Math/math_util.h
@@ -189,12 +189,3 @@ inline uint16_t ShrinkToHalf(float full) {
FP16 fp = float_to_half_fast3(fp32);
return fp.u;
}
-
-// FPU control.
-void EnableFZ();
-
-// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
-// where they can schedule VFP instructions on the NEON unit (these implementations have
-// very slow VFP units).
-// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
-void FPU_SetFastMode();
diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp
index c36d39a6f1..3d14e9c1aa 100644
--- a/GPU/Common/FramebufferManagerCommon.cpp
+++ b/GPU/Common/FramebufferManagerCommon.cpp
@@ -1215,7 +1215,7 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer
}
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
- BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "Blit_CopyFramebufferForColorTexture");
+ BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
}
}
diff --git a/Qt/QtMain.cpp b/Qt/QtMain.cpp
index bba1f1ae2b..c6381a112d 100644
--- a/Qt/QtMain.cpp
+++ b/Qt/QtMain.cpp
@@ -316,7 +316,6 @@ static int mainInternal(QApplication &a) {
emugl->resize(pixel_xres, pixel_yres);
emugl->showFullScreen();
#endif
- EnableFZ();
// Disable screensaver
#if defined(QT_HAS_SYSTEMINFO)
QScreenSaver ssObject(emugl);
diff --git a/SDL/SDLMain.cpp b/SDL/SDLMain.cpp
index da966cabae..0a6daa5361 100644
--- a/SDL/SDLMain.cpp
+++ b/SDL/SDLMain.cpp
@@ -807,7 +807,6 @@ int main(int argc, char *argv[]) {
} else {
joystick = nullptr;
}
- EnableFZ();
int framecount = 0;
bool mouseDown = false;
diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp
index c29d8e67c6..bcf5a47a48 100644
--- a/UI/EmuScreen.cpp
+++ b/UI/EmuScreen.cpp
@@ -344,6 +344,8 @@ void EmuScreen::bootGame(const Path &filename) {
loadingViewColor_->Divert(0xFFFFFFFF, 0.75f);
loadingViewVisible_->Divert(UI::V_VISIBLE, 0.75f);
+
+ screenManager()->getDrawContext()->ResetStats();
}
void EmuScreen::bootComplete() {
diff --git a/UWP/CommonUWP/CommonUWP.vcxproj b/UWP/CommonUWP/CommonUWP.vcxproj
index 5c03d355c9..09e9173e41 100644
--- a/UWP/CommonUWP/CommonUWP.vcxproj
+++ b/UWP/CommonUWP/CommonUWP.vcxproj
@@ -387,6 +387,7 @@
+
@@ -519,6 +520,7 @@
+
@@ -639,4 +641,4 @@
-
+
\ No newline at end of file
diff --git a/UWP/CommonUWP/CommonUWP.vcxproj.filters b/UWP/CommonUWP/CommonUWP.vcxproj.filters
index 7ab8b012c7..23de400ce6 100644
--- a/UWP/CommonUWP/CommonUWP.vcxproj.filters
+++ b/UWP/CommonUWP/CommonUWP.vcxproj.filters
@@ -372,6 +372,9 @@
File
+
+ Math
+
@@ -688,11 +691,9 @@
File
-
-
-
- ext\libpng17
-
+
+ Math
+
@@ -701,5 +702,6 @@
Math\lin
+
-
+
\ No newline at end of file
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 42cae5cb28..305caf67a5 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -164,6 +164,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Common/Input/InputState.cpp \
$(SRC)/Common/Math/fast/fast_matrix.c \
$(SRC)/Common/Math/math_util.cpp \
+ $(SRC)/Common/Math/Statistics.cpp \
$(SRC)/Common/Math/curves.cpp \
$(SRC)/Common/Math/expression_parser.cpp \
$(SRC)/Common/Math/lin/vec3.cpp.arm \
diff --git a/ios/ViewController.mm b/ios/ViewController.mm
index c90c30a22a..63d241cf46 100644
--- a/ios/ViewController.mm
+++ b/ios/ViewController.mm
@@ -807,6 +807,3 @@ void bindDefaultFBO()
{
[sharedViewController bindDefaultFBO];
}
-
-void EnableFZ(){};
-void DisableFZ(){};
diff --git a/libretro/Makefile.common b/libretro/Makefile.common
index 56c748b52a..375792b296 100644
--- a/libretro/Makefile.common
+++ b/libretro/Makefile.common
@@ -271,6 +271,7 @@ SOURCES_CXX += \
$(COMMONDIR)/Math/curves.cpp \
$(COMMONDIR)/Math/expression_parser.cpp \
$(COMMONDIR)/Math/math_util.cpp \
+ $(COMMONDIR)/Math/Statistics.cpp \
$(COMMONDIR)/Math/lin/vec3.cpp \
$(COMMONDIR)/Math/lin/matrix4x4.cpp \
$(COMMONDIR)/Net/HTTPClient.cpp \