mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Merge pull request #16196 from hrydgard/improved-render-stats
Improved stats in the Vulkan GPU profiler
This commit is contained in:
commit
d56bdcb81e
@ -650,6 +650,8 @@ add_library(Common STATIC
|
||||
Common/Math/lin/vec3.h
|
||||
Common/Math/math_util.cpp
|
||||
Common/Math/math_util.h
|
||||
Common/Math/Statistics.h
|
||||
Common/Math/Statistics.cpp
|
||||
Common/Net/HTTPClient.cpp
|
||||
Common/Net/HTTPClient.h
|
||||
Common/Net/HTTPHeaders.cpp
|
||||
|
@ -466,6 +466,7 @@
|
||||
<ClInclude Include="Math\lin\matrix4x4.h" />
|
||||
<ClInclude Include="Math\lin\vec3.h" />
|
||||
<ClInclude Include="Math\math_util.h" />
|
||||
<ClInclude Include="Math\Statistics.h" />
|
||||
<ClInclude Include="Net\NetBuffer.h" />
|
||||
<ClInclude Include="Net\HTTPClient.h" />
|
||||
<ClInclude Include="Net\HTTPHeaders.h" />
|
||||
@ -886,6 +887,7 @@
|
||||
<ClCompile Include="Math\lin\matrix4x4.cpp" />
|
||||
<ClCompile Include="Math\lin\vec3.cpp" />
|
||||
<ClCompile Include="Math\math_util.cpp" />
|
||||
<ClCompile Include="Math\Statistics.cpp" />
|
||||
<ClCompile Include="Net\NetBuffer.cpp" />
|
||||
<ClCompile Include="Net\HTTPClient.cpp" />
|
||||
<ClCompile Include="Net\HTTPHeaders.cpp" />
|
||||
|
@ -422,6 +422,9 @@
|
||||
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h">
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Math\Statistics.h">
|
||||
<Filter>Math</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ABI.cpp" />
|
||||
@ -797,6 +800,9 @@
|
||||
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
|
||||
<Filter>GPU\Vulkan</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Math\Statistics.cpp">
|
||||
<Filter>Math</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Crypto">
|
||||
|
@ -515,6 +515,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
|
||||
}
|
||||
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
|
||||
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
|
||||
steps[i]->render.numDraws += steps[j]->render.numDraws;
|
||||
steps[i]->render.numReads += steps[j]->render.numReads;
|
||||
// Cheaply skip the first step.
|
||||
steps[j]->stepType = VKRStepType::RENDER_SKIP;
|
||||
break;
|
||||
@ -936,6 +938,8 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
|
||||
// So we don't consider it for other things, maybe doesn't matter.
|
||||
src->dependencies.clear();
|
||||
src->stepType = VKRStepType::RENDER_SKIP;
|
||||
dst->render.numDraws += src->render.numDraws;
|
||||
dst->render.numReads += src->render.numReads;
|
||||
dst->render.pipelineFlags |= src->render.pipelineFlags;
|
||||
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
|
||||
};
|
||||
|
@ -327,7 +327,12 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
|
||||
img.tag = tag ? tag : "N/A";
|
||||
}
|
||||
|
||||
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
|
||||
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
|
||||
: vulkan_(vulkan), queueRunner_(vulkan),
|
||||
initTimeMs_("initTimeMs"),
|
||||
totalGPUTimeMs_("totalGPUTimeMs"),
|
||||
renderCPUTimeMs_("renderCPUTimeMs")
|
||||
{
|
||||
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
|
||||
|
||||
frameDataShared_.Init(vulkan);
|
||||
@ -579,14 +584,25 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
|
||||
std::stringstream str;
|
||||
|
||||
char line[256];
|
||||
snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
|
||||
totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
|
||||
totalGPUTimeMs_.Format(line, sizeof(line));
|
||||
str << line;
|
||||
snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
|
||||
renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
|
||||
renderCPUTimeMs_.Format(line, sizeof(line));
|
||||
str << line;
|
||||
for (int i = 0; i < numQueries - 1; i++) {
|
||||
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
|
||||
double milliseconds = (double)diff * timestampConversionFactor;
|
||||
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
|
||||
|
||||
// Can't use SimpleStat for these very easily since these are dynamic per frame.
|
||||
// Only the first one is static, the initCmd.
|
||||
// Could try some hashtable tracking for the rest, later.
|
||||
if (i == 0) {
|
||||
initTimeMs_.Update(milliseconds);
|
||||
initTimeMs_.Format(line, sizeof(line));
|
||||
} else {
|
||||
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
|
||||
}
|
||||
str << line;
|
||||
}
|
||||
frameData.profile.profileSummary = str.str();
|
||||
@ -1344,3 +1360,9 @@ void VulkanRenderManager::FlushSync() {
|
||||
frameData.syncDone = false;
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanRenderManager::ResetStats() {
|
||||
initTimeMs_.Reset();
|
||||
totalGPUTimeMs_.Reset();
|
||||
renderCPUTimeMs_.Reset();
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <thread>
|
||||
#include <queue>
|
||||
|
||||
#include "Common/Math/Statistics.h"
|
||||
#include "Common/Thread/Promise.h"
|
||||
#include "Common/System/Display.h"
|
||||
#include "Common/GPU/Vulkan/VulkanContext.h"
|
||||
@ -465,6 +466,8 @@ public:
|
||||
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
|
||||
}
|
||||
|
||||
void ResetStats();
|
||||
|
||||
private:
|
||||
void EndCurRenderStep();
|
||||
|
||||
@ -535,4 +538,9 @@ private:
|
||||
|
||||
// pipelines to check and possibly create at the end of the current render pass.
|
||||
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;
|
||||
|
||||
// For nicer output in the little internal GPU profiler.
|
||||
SimpleStat initTimeMs_;
|
||||
SimpleStat totalGPUTimeMs_;
|
||||
SimpleStat renderCPUTimeMs_;
|
||||
};
|
||||
|
@ -455,6 +455,10 @@ public:
|
||||
|
||||
void FlushState() override {}
|
||||
|
||||
void ResetStats() override {
|
||||
renderManager_.ResetStats();
|
||||
}
|
||||
|
||||
std::string GetInfoString(InfoField info) const override {
|
||||
// TODO: Make these actually query the right information
|
||||
switch (info) {
|
||||
|
@ -735,6 +735,9 @@ public:
|
||||
// Flush state like scissors etc so the caller can do its own custom drawing.
|
||||
virtual void FlushState() {}
|
||||
|
||||
// This is called when we launch a new game, so any collected internal stats in the backends don't carry over.
|
||||
virtual void ResetStats() {}
|
||||
|
||||
virtual int GetCurrentStepId() const = 0;
|
||||
|
||||
protected:
|
||||
|
11
Common/Math/Statistics.cpp
Normal file
11
Common/Math/Statistics.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include <cstdio>
|
||||
|
||||
#include "Common/Math/Statistics.h"
|
||||
|
||||
void SimpleStat::Format(char *buffer, size_t sz) {
|
||||
if (min_ == INFINITY) {
|
||||
snprintf(buffer, sz, "%s: N/A\n", name_);
|
||||
} else {
|
||||
snprintf(buffer, sz, "%s: %0.2f (%0.2f..%0.2f, avg %0.2f)\n", name_, value_, min_, max_, smoothed_);
|
||||
}
|
||||
}
|
42
Common/Math/Statistics.h
Normal file
42
Common/Math/Statistics.h
Normal file
@ -0,0 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
|
||||
// Very simple stat for convenience. Keeps track of min, max, smoothed.
|
||||
struct SimpleStat {
|
||||
SimpleStat(const char *name) : name_(name) { Reset(); }
|
||||
|
||||
void Update(double value) {
|
||||
value_ = value;
|
||||
if (min_ == INFINITY) {
|
||||
smoothed_ = value;
|
||||
} else {
|
||||
// TODO: Make factor adjustable?
|
||||
smoothed_ = 0.99 * smoothed_ + 0.01 * value;
|
||||
}
|
||||
if (value < min_) {
|
||||
min_ = value;
|
||||
}
|
||||
if (value > max_) {
|
||||
max_ = value;
|
||||
}
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
value_ = 0.0;
|
||||
smoothed_ = 0.0; // doens't really need init
|
||||
min_ = INFINITY;
|
||||
max_ = -INFINITY;
|
||||
}
|
||||
|
||||
void Format(char *buffer, size_t sz);
|
||||
|
||||
private:
|
||||
const char *name_;
|
||||
|
||||
// These are initialized in Reset().
|
||||
double value_;
|
||||
double min_;
|
||||
double max_;
|
||||
double smoothed_;
|
||||
};
|
@ -1,44 +1,5 @@
|
||||
#include "Common/Math/math_util.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
// QNX can only use RunFast mode and it is already the default.
|
||||
#if defined(__ARM_ARCH_7A__)
|
||||
// Enables 'RunFast' VFP mode.
|
||||
void EnableFZ() {
|
||||
int x;
|
||||
asm(
|
||||
"fmrx %[result],FPSCR \r\n"
|
||||
"orr %[result],%[result],#16777216 \r\n"
|
||||
"fmxr FPSCR,%[result]"
|
||||
:[result] "=r" (x) : :
|
||||
);
|
||||
//printf("ARM FPSCR: %08x\n",x);
|
||||
}
|
||||
#include <cstdlib>
|
||||
|
||||
// New fastmode code from: http://pandorawiki.org/Floating_Point_Optimization
|
||||
// These settings turbocharge the slow VFP unit on Cortex-A8 based chips by setting
|
||||
// restrictions that permit running VFP instructions on the NEON unit.
|
||||
// Denormal flush-to-zero, for example.
|
||||
void FPU_SetFastMode() {
|
||||
static const unsigned int x = 0x04086060;
|
||||
static const unsigned int y = 0x03000000;
|
||||
int r;
|
||||
asm volatile (
|
||||
"fmrx %0, fpscr \n\t" //r0 = FPSCR
|
||||
"and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060
|
||||
"orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000
|
||||
"fmxr fpscr, %0 \n\t" //FPSCR = r0
|
||||
: "=r"(r)
|
||||
: "r"(x), "r"(y)
|
||||
);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void EnableFZ() {
|
||||
// TODO
|
||||
}
|
||||
|
||||
void FPU_SetFastMode() {}
|
||||
|
||||
#endif
|
||||
// Could delete this file, but might find use again.
|
||||
|
@ -189,12 +189,3 @@ inline uint16_t ShrinkToHalf(float full) {
|
||||
FP16 fp = float_to_half_fast3(fp32);
|
||||
return fp.u;
|
||||
}
|
||||
|
||||
// FPU control.
|
||||
void EnableFZ();
|
||||
|
||||
// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
|
||||
// where they can schedule VFP instructions on the NEON unit (these implementations have
|
||||
// very slow VFP units).
|
||||
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
|
||||
void FPU_SetFastMode();
|
||||
|
@ -1215,7 +1215,7 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer
|
||||
}
|
||||
|
||||
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
|
||||
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "Blit_CopyFramebufferForColorTexture");
|
||||
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -316,7 +316,6 @@ static int mainInternal(QApplication &a) {
|
||||
emugl->resize(pixel_xres, pixel_yres);
|
||||
emugl->showFullScreen();
|
||||
#endif
|
||||
EnableFZ();
|
||||
// Disable screensaver
|
||||
#if defined(QT_HAS_SYSTEMINFO)
|
||||
QScreenSaver ssObject(emugl);
|
||||
|
@ -807,7 +807,6 @@ int main(int argc, char *argv[]) {
|
||||
} else {
|
||||
joystick = nullptr;
|
||||
}
|
||||
EnableFZ();
|
||||
|
||||
int framecount = 0;
|
||||
bool mouseDown = false;
|
||||
|
@ -344,6 +344,8 @@ void EmuScreen::bootGame(const Path &filename) {
|
||||
|
||||
loadingViewColor_->Divert(0xFFFFFFFF, 0.75f);
|
||||
loadingViewVisible_->Divert(UI::V_VISIBLE, 0.75f);
|
||||
|
||||
screenManager()->getDrawContext()->ResetStats();
|
||||
}
|
||||
|
||||
void EmuScreen::bootComplete() {
|
||||
|
@ -387,6 +387,7 @@
|
||||
<ClInclude Include="..\..\Common\BitSet.h" />
|
||||
<ClInclude Include="..\..\Common\Buffer.h" />
|
||||
<ClInclude Include="..\..\Common\File\AndroidStorage.h" />
|
||||
<ClInclude Include="..\..\Common\Math\Statistics.h" />
|
||||
<ClInclude Include="..\..\Common\Net\NetBuffer.h" />
|
||||
<ClInclude Include="..\..\Common\Data\Collections\ConstMap.h" />
|
||||
<ClInclude Include="..\..\Common\Data\Collections\FixedSizeQueue.h" />
|
||||
@ -519,6 +520,7 @@
|
||||
<ClCompile Include="..\..\Common\ArmEmitter.cpp" />
|
||||
<ClCompile Include="..\..\Common\Buffer.cpp" />
|
||||
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp" />
|
||||
<ClCompile Include="..\..\Common\Math\Statistics.cpp" />
|
||||
<ClCompile Include="..\..\Common\Net\NetBuffer.cpp" />
|
||||
<ClCompile Include="..\..\Common\Data\Color\RGBAUtil.cpp" />
|
||||
<ClCompile Include="..\..\Common\Data\Convert\SmallDataConvert.cpp" />
|
||||
@ -639,4 +641,4 @@
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
@ -372,6 +372,9 @@
|
||||
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp">
|
||||
<Filter>File</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Common\Math\Statistics.cpp">
|
||||
<Filter>Math</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="targetver.h" />
|
||||
@ -688,11 +691,9 @@
|
||||
<ClInclude Include="..\..\Common\File\AndroidStorage.h">
|
||||
<Filter>File</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="..\..\ext\libpng17\CMakeLists.txt">
|
||||
<Filter>ext\libpng17</Filter>
|
||||
</Text>
|
||||
<ClInclude Include="..\..\Common\Math\Statistics.h">
|
||||
<Filter>Math</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\Common\Math\fast\fast_matrix_neon.S">
|
||||
@ -701,5 +702,6 @@
|
||||
<None Include="..\..\Common\Math\lin\matrix_neon.s">
|
||||
<Filter>Math\lin</Filter>
|
||||
</None>
|
||||
<None Include="..\..\ext\libpng17\CMakeLists.txt" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
@ -164,6 +164,7 @@ EXEC_AND_LIB_FILES := \
|
||||
$(SRC)/Common/Input/InputState.cpp \
|
||||
$(SRC)/Common/Math/fast/fast_matrix.c \
|
||||
$(SRC)/Common/Math/math_util.cpp \
|
||||
$(SRC)/Common/Math/Statistics.cpp \
|
||||
$(SRC)/Common/Math/curves.cpp \
|
||||
$(SRC)/Common/Math/expression_parser.cpp \
|
||||
$(SRC)/Common/Math/lin/vec3.cpp.arm \
|
||||
|
@ -807,6 +807,3 @@ void bindDefaultFBO()
|
||||
{
|
||||
[sharedViewController bindDefaultFBO];
|
||||
}
|
||||
|
||||
void EnableFZ(){};
|
||||
void DisableFZ(){};
|
||||
|
@ -271,6 +271,7 @@ SOURCES_CXX += \
|
||||
$(COMMONDIR)/Math/curves.cpp \
|
||||
$(COMMONDIR)/Math/expression_parser.cpp \
|
||||
$(COMMONDIR)/Math/math_util.cpp \
|
||||
$(COMMONDIR)/Math/Statistics.cpp \
|
||||
$(COMMONDIR)/Math/lin/vec3.cpp \
|
||||
$(COMMONDIR)/Math/lin/matrix4x4.cpp \
|
||||
$(COMMONDIR)/Net/HTTPClient.cpp \
|
||||
|
Loading…
Reference in New Issue
Block a user