Merge pull request #16196 from hrydgard/improved-render-stats

Improved stats in the Vulkan GPU profiler
This commit is contained in:
Henrik Rydgård 2022-10-10 15:40:17 +02:00 committed by GitHub
commit d56bdcb81e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 126 additions and 67 deletions

View File

@ -650,6 +650,8 @@ add_library(Common STATIC
Common/Math/lin/vec3.h
Common/Math/math_util.cpp
Common/Math/math_util.h
Common/Math/Statistics.h
Common/Math/Statistics.cpp
Common/Net/HTTPClient.cpp
Common/Net/HTTPClient.h
Common/Net/HTTPHeaders.cpp

View File

@ -466,6 +466,7 @@
<ClInclude Include="Math\lin\matrix4x4.h" />
<ClInclude Include="Math\lin\vec3.h" />
<ClInclude Include="Math\math_util.h" />
<ClInclude Include="Math\Statistics.h" />
<ClInclude Include="Net\NetBuffer.h" />
<ClInclude Include="Net\HTTPClient.h" />
<ClInclude Include="Net\HTTPHeaders.h" />
@ -886,6 +887,7 @@
<ClCompile Include="Math\lin\matrix4x4.cpp" />
<ClCompile Include="Math\lin\vec3.cpp" />
<ClCompile Include="Math\math_util.cpp" />
<ClCompile Include="Math\Statistics.cpp" />
<ClCompile Include="Net\NetBuffer.cpp" />
<ClCompile Include="Net\HTTPClient.cpp" />
<ClCompile Include="Net\HTTPHeaders.cpp" />

View File

@ -422,6 +422,9 @@
<ClInclude Include="GPU\Vulkan\VulkanFrameData.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
<ClInclude Include="Math\Statistics.h">
<Filter>Math</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
@ -797,6 +800,9 @@
<ClCompile Include="GPU\Vulkan\VulkanFrameData.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
<ClCompile Include="Math\Statistics.cpp">
<Filter>Math</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">

View File

@ -515,6 +515,8 @@ void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
}
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
steps[i]->render.numDraws += steps[j]->render.numDraws;
steps[i]->render.numReads += steps[j]->render.numReads;
// Cheaply skip the first step.
steps[j]->stepType = VKRStepType::RENDER_SKIP;
break;
@ -936,6 +938,8 @@ void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
// So we don't consider it for other things, maybe doesn't matter.
src->dependencies.clear();
src->stepType = VKRStepType::RENDER_SKIP;
dst->render.numDraws += src->render.numDraws;
dst->render.numReads += src->render.numReads;
dst->render.pipelineFlags |= src->render.pipelineFlags;
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
};

View File

@ -327,7 +327,12 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
img.tag = tag ? tag : "N/A";
}
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan), queueRunner_(vulkan) {
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
: vulkan_(vulkan), queueRunner_(vulkan),
initTimeMs_("initTimeMs"),
totalGPUTimeMs_("totalGPUTimeMs"),
renderCPUTimeMs_("renderCPUTimeMs")
{
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
frameDataShared_.Init(vulkan);
@ -579,14 +584,25 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
std::stringstream str;
char line[256];
snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
totalGPUTimeMs_.Format(line, sizeof(line));
str << line;
snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
renderCPUTimeMs_.Format(line, sizeof(line));
str << line;
for (int i = 0; i < numQueries - 1; i++) {
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
double milliseconds = (double)diff * timestampConversionFactor;
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
// Can't use SimpleStat for these very easily since these are dynamic per frame.
// Only the first one is static, the initCmd.
// Could try some hashtable tracking for the rest, later.
if (i == 0) {
initTimeMs_.Update(milliseconds);
initTimeMs_.Format(line, sizeof(line));
} else {
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
}
str << line;
}
frameData.profile.profileSummary = str.str();
@ -1344,3 +1360,9 @@ void VulkanRenderManager::FlushSync() {
frameData.syncDone = false;
}
}
void VulkanRenderManager::ResetStats() {
initTimeMs_.Reset();
totalGPUTimeMs_.Reset();
renderCPUTimeMs_.Reset();
}

View File

@ -12,6 +12,7 @@
#include <thread>
#include <queue>
#include "Common/Math/Statistics.h"
#include "Common/Thread/Promise.h"
#include "Common/System/Display.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
@ -465,6 +466,8 @@ public:
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
}
void ResetStats();
private:
void EndCurRenderStep();
@ -535,4 +538,9 @@ private:
// pipelines to check and possibly create at the end of the current render pass.
std::vector<VKRGraphicsPipeline *> pipelinesToCheck_;
// For nicer output in the little internal GPU profiler.
SimpleStat initTimeMs_;
SimpleStat totalGPUTimeMs_;
SimpleStat renderCPUTimeMs_;
};

View File

@ -455,6 +455,10 @@ public:
void FlushState() override {}
void ResetStats() override {
renderManager_.ResetStats();
}
std::string GetInfoString(InfoField info) const override {
// TODO: Make these actually query the right information
switch (info) {

View File

@ -735,6 +735,9 @@ public:
// Flush state like scissors etc so the caller can do its own custom drawing.
virtual void FlushState() {}
// This is called when we launch a new game, so any collected internal stats in the backends don't carry over.
virtual void ResetStats() {}
virtual int GetCurrentStepId() const = 0;
protected:

View File

@ -0,0 +1,11 @@
#include <cstdio>
#include "Common/Math/Statistics.h"
void SimpleStat::Format(char *buffer, size_t sz) {
if (min_ == INFINITY) {
snprintf(buffer, sz, "%s: N/A\n", name_);
} else {
snprintf(buffer, sz, "%s: %0.2f (%0.2f..%0.2f, avg %0.2f)\n", name_, value_, min_, max_, smoothed_);
}
}

42
Common/Math/Statistics.h Normal file
View File

@ -0,0 +1,42 @@
#pragma once
#include <cmath>
// Very simple stat for convenience. Keeps track of min, max, smoothed.
struct SimpleStat {
SimpleStat(const char *name) : name_(name) { Reset(); }
void Update(double value) {
value_ = value;
if (min_ == INFINITY) {
smoothed_ = value;
} else {
// TODO: Make factor adjustable?
smoothed_ = 0.99 * smoothed_ + 0.01 * value;
}
if (value < min_) {
min_ = value;
}
if (value > max_) {
max_ = value;
}
}
void Reset() {
value_ = 0.0;
smoothed_ = 0.0; // doens't really need init
min_ = INFINITY;
max_ = -INFINITY;
}
void Format(char *buffer, size_t sz);
private:
const char *name_;
// These are initialized in Reset().
double value_;
double min_;
double max_;
double smoothed_;
};

View File

@ -1,44 +1,5 @@
#include "Common/Math/math_util.h"
#include <stdlib.h>
// QNX can only use RunFast mode and it is already the default.
#if defined(__ARM_ARCH_7A__)
// Enables 'RunFast' VFP mode.
void EnableFZ() {
int x;
asm(
"fmrx %[result],FPSCR \r\n"
"orr %[result],%[result],#16777216 \r\n"
"fmxr FPSCR,%[result]"
:[result] "=r" (x) : :
);
//printf("ARM FPSCR: %08x\n",x);
}
#include <cstdlib>
// New fastmode code from: http://pandorawiki.org/Floating_Point_Optimization
// These settings turbocharge the slow VFP unit on Cortex-A8 based chips by setting
// restrictions that permit running VFP instructions on the NEON unit.
// Denormal flush-to-zero, for example.
void FPU_SetFastMode() {
static const unsigned int x = 0x04086060;
static const unsigned int y = 0x03000000;
int r;
asm volatile (
"fmrx %0, fpscr \n\t" //r0 = FPSCR
"and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060
"orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000
"fmxr fpscr, %0 \n\t" //FPSCR = r0
: "=r"(r)
: "r"(x), "r"(y)
);
}
#else
void EnableFZ() {
// TODO
}
void FPU_SetFastMode() {}
#endif
// Could delete this file, but might find use again.

View File

@ -189,12 +189,3 @@ inline uint16_t ShrinkToHalf(float full) {
FP16 fp = float_to_half_fast3(fp32);
return fp.u;
}
// FPU control.
void EnableFZ();
// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
// where they can schedule VFP instructions on the NEON unit (these implementations have
// very slow VFP units).
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
void FPU_SetFastMode();

View File

@ -1215,7 +1215,7 @@ void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer
}
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "Blit_CopyFramebufferForColorTexture");
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
}
}

View File

@ -316,7 +316,6 @@ static int mainInternal(QApplication &a) {
emugl->resize(pixel_xres, pixel_yres);
emugl->showFullScreen();
#endif
EnableFZ();
// Disable screensaver
#if defined(QT_HAS_SYSTEMINFO)
QScreenSaver ssObject(emugl);

View File

@ -807,7 +807,6 @@ int main(int argc, char *argv[]) {
} else {
joystick = nullptr;
}
EnableFZ();
int framecount = 0;
bool mouseDown = false;

View File

@ -344,6 +344,8 @@ void EmuScreen::bootGame(const Path &filename) {
loadingViewColor_->Divert(0xFFFFFFFF, 0.75f);
loadingViewVisible_->Divert(UI::V_VISIBLE, 0.75f);
screenManager()->getDrawContext()->ResetStats();
}
void EmuScreen::bootComplete() {

View File

@ -387,6 +387,7 @@
<ClInclude Include="..\..\Common\BitSet.h" />
<ClInclude Include="..\..\Common\Buffer.h" />
<ClInclude Include="..\..\Common\File\AndroidStorage.h" />
<ClInclude Include="..\..\Common\Math\Statistics.h" />
<ClInclude Include="..\..\Common\Net\NetBuffer.h" />
<ClInclude Include="..\..\Common\Data\Collections\ConstMap.h" />
<ClInclude Include="..\..\Common\Data\Collections\FixedSizeQueue.h" />
@ -519,6 +520,7 @@
<ClCompile Include="..\..\Common\ArmEmitter.cpp" />
<ClCompile Include="..\..\Common\Buffer.cpp" />
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp" />
<ClCompile Include="..\..\Common\Math\Statistics.cpp" />
<ClCompile Include="..\..\Common\Net\NetBuffer.cpp" />
<ClCompile Include="..\..\Common\Data\Color\RGBAUtil.cpp" />
<ClCompile Include="..\..\Common\Data\Convert\SmallDataConvert.cpp" />
@ -639,4 +641,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -372,6 +372,9 @@
<ClCompile Include="..\..\Common\File\AndroidStorage.cpp">
<Filter>File</Filter>
</ClCompile>
<ClCompile Include="..\..\Common\Math\Statistics.cpp">
<Filter>Math</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="targetver.h" />
@ -688,11 +691,9 @@
<ClInclude Include="..\..\Common\File\AndroidStorage.h">
<Filter>File</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="..\..\ext\libpng17\CMakeLists.txt">
<Filter>ext\libpng17</Filter>
</Text>
<ClInclude Include="..\..\Common\Math\Statistics.h">
<Filter>Math</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\Common\Math\fast\fast_matrix_neon.S">
@ -701,5 +702,6 @@
<None Include="..\..\Common\Math\lin\matrix_neon.s">
<Filter>Math\lin</Filter>
</None>
<None Include="..\..\ext\libpng17\CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>

View File

@ -164,6 +164,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Common/Input/InputState.cpp \
$(SRC)/Common/Math/fast/fast_matrix.c \
$(SRC)/Common/Math/math_util.cpp \
$(SRC)/Common/Math/Statistics.cpp \
$(SRC)/Common/Math/curves.cpp \
$(SRC)/Common/Math/expression_parser.cpp \
$(SRC)/Common/Math/lin/vec3.cpp.arm \

View File

@ -807,6 +807,3 @@ void bindDefaultFBO()
{
[sharedViewController bindDefaultFBO];
}
void EnableFZ(){};
void DisableFZ(){};

View File

@ -271,6 +271,7 @@ SOURCES_CXX += \
$(COMMONDIR)/Math/curves.cpp \
$(COMMONDIR)/Math/expression_parser.cpp \
$(COMMONDIR)/Math/math_util.cpp \
$(COMMONDIR)/Math/Statistics.cpp \
$(COMMONDIR)/Math/lin/vec3.cpp \
$(COMMONDIR)/Math/lin/matrix4x4.cpp \
$(COMMONDIR)/Net/HTTPClient.cpp \