Merge pull request #15229 from hrydgard/gpu-profiling-improvements

Add a new logging profiler to the Vulkan backend
This commit is contained in:
Henrik Rydgård 2021-12-12 13:41:51 +01:00 committed by GitHub
commit 293b5c0cf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 213 additions and 6 deletions

View File

@ -589,6 +589,8 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanLoader.h
Common/GPU/Vulkan/VulkanMemory.cpp
Common/GPU/Vulkan/VulkanMemory.h
Common/GPU/Vulkan/VulkanProfiler.cpp
Common/GPU/Vulkan/VulkanProfiler.h
Common/GPU/Vulkan/thin3d_vulkan.cpp
Common/GPU/Vulkan/VulkanRenderManager.cpp
Common/GPU/Vulkan/VulkanRenderManager.h

View File

@ -443,6 +443,7 @@
<ClInclude Include="GPU\Vulkan\VulkanImage.h" />
<ClInclude Include="GPU\Vulkan\VulkanLoader.h" />
<ClInclude Include="GPU\Vulkan\VulkanMemory.h" />
<ClInclude Include="GPU\Vulkan\VulkanProfiler.h" />
<ClInclude Include="GPU\Vulkan\VulkanQueueRunner.h" />
<ClInclude Include="GPU\Vulkan\VulkanRenderManager.h" />
<ClInclude Include="Input\GestureDetector.h" />
@ -870,6 +871,7 @@
<ClCompile Include="GPU\Vulkan\VulkanImage.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanLoader.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanMemory.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanProfiler.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanQueueRunner.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanRenderManager.cpp" />
<ClCompile Include="Input\GestureDetector.cpp" />

View File

@ -412,6 +412,9 @@
<ClInclude Include="GPU\Vulkan\VulkanAlloc.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
<ClInclude Include="GPU\Vulkan\VulkanProfiler.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
@ -786,6 +789,9 @@
<ClCompile Include="..\ext\vma\vk_mem_alloc.cpp">
<Filter>ext\vma</Filter>
</ClCompile>
<ClCompile Include="GPU\Vulkan\VulkanProfiler.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">

View File

@ -289,10 +289,14 @@ void VulkanContext::DestroyInstance() {
instance_ = VK_NULL_HANDLE;
}
void VulkanContext::BeginFrame() {
void VulkanContext::BeginFrame(VkCommandBuffer firstCommandBuffer) {
FrameData *frame = &frame_[curFrame_];
// Process pending deletes.
frame->deleteList.PerformDeletes(device_, allocator_);
// VK_NULL_HANDLE when profiler is disabled.
if (firstCommandBuffer) {
frame->profiler.BeginFrame(this, firstCommandBuffer);
}
}
void VulkanContext::EndFrame() {
@ -675,6 +679,11 @@ VkResult VulkanContext::CreateDevice() {
allocatorInfo.device = device_;
allocatorInfo.instance = instance_;
vmaCreateAllocator(&allocatorInfo, &allocator_);
for (int i = 0; i < ARRAY_SIZE(frame_); i++) {
frame_[i].profiler.Init(this);
}
return res;
}
@ -1122,6 +1131,10 @@ void VulkanContext::DestroyDevice() {
INFO_LOG(G3D, "VulkanContext::DestroyDevice (performing deletes)");
PerformPendingDeletes();
for (int i = 0; i < ARRAY_SIZE(frame_); i++) {
frame_[i].profiler.Shutdown();
}
vmaDestroyAllocator(allocator_);
allocator_ = VK_NULL_HANDLE;

View File

@ -9,6 +9,21 @@
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/GPU/Vulkan/VulkanDebug.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanProfiler.h"
// Enable or disable a simple logging profiler for Vulkan.
// Mostly useful for profiling texture uploads currently, but could be useful for
// other things as well. We also have a nice integrated render pass profiler in the queue
// runner, but this one is more convenient for transient events.
// #define VULKAN_PROFILER_ENABLED
#if defined(VULKAN_PROFILER_ENABLED)
#define VK_PROFILE_BEGIN(vulkan, cmd, stage, message) vulkan->GetProfiler()->Begin(cmd, stage, message);
#define VK_PROFILE_END(vulkan, cmd, stage) vulkan->GetProfiler()->End(cmd, stage);
#else
#define VK_PROFILE_BEGIN(vulkan, cmd, stage, message)
#define VK_PROFILE_END(vulkan, cmd, stage)
#endif
enum {
VULKAN_FLAG_VALIDATE = 1,
@ -62,6 +77,8 @@ struct VulkanPhysicalDeviceInfo {
bool canBlitToPreferredDepthStencilFormat;
};
class VulkanProfiler;
// This is a bit repetitive...
class VulkanDeleteList {
struct BufferWithAlloc {
@ -188,9 +205,13 @@ public:
int GetBackbufferWidth() { return (int)swapChainExtent_.width; }
int GetBackbufferHeight() { return (int)swapChainExtent_.height; }
void BeginFrame();
void BeginFrame(VkCommandBuffer firstCommandBuffer);
void EndFrame();
VulkanProfiler *GetProfiler() {
return &frame_[curFrame_].profiler;
}
// Simple workaround for the casting warning.
template <class T>
void SetDebugName(T handle, VkObjectType type, const char *name) {
@ -369,6 +390,7 @@ private:
struct FrameData {
FrameData() {}
VulkanDeleteList deleteList;
VulkanProfiler profiler;
};
FrameData frame_[MAX_INFLIGHT_FRAMES];
int curFrame_ = 0;

View File

@ -1,6 +1,7 @@
#include <algorithm>
#include "Common/Log.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanImage.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"

View File

@ -1,7 +1,9 @@
#pragma once
#include "Common/GPU/Vulkan/VulkanContext.h"
#include <string>
#include "VulkanLoader.h"
class VulkanContext;
class VulkanDeviceAllocator;
VK_DEFINE_HANDLE(VmaAllocation);

View File

@ -0,0 +1,91 @@
#include "VulkanProfiler.h"
#include "VulkanContext.h"
using namespace PPSSPP_VK;
void VulkanProfiler::Init(VulkanContext *vulkan) {
vulkan_ = vulkan;
VkQueryPoolCreateInfo ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
ci.queryCount = MAX_QUERY_COUNT;
ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
vkCreateQueryPool(vulkan->GetDevice(), &ci, nullptr, &queryPool_);
}
void VulkanProfiler::Shutdown() {
vkDestroyQueryPool(vulkan_->GetDevice(), queryPool_, nullptr);
}
void VulkanProfiler::BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuf) {
vulkan_ = vulkan;
// Check for old queries belonging to this frame context that we can log out - these are now
// guaranteed to be done.
if (numQueries_ > 0) {
std::vector<uint64_t> results(numQueries_);
vkGetQueryPoolResults(vulkan->GetDevice(), queryPool_, 0, numQueries_, sizeof(uint64_t) * numQueries_, results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);
static const char * const indent[4] = { "", " ", " ", " " };
// Log it all out.
for (auto &scope : scopes_) {
if (scope.endQueryId == -1) {
NOTICE_LOG(G3D, "Unclosed scope: %s", scope.name.c_str());
continue;
}
uint64_t startTime = results[scope.startQueryId];
uint64_t endTime = results[scope.endQueryId];
uint64_t delta = (endTime - startTime) & timestampDiffMask;
double milliseconds = (double)delta * timestampConversionFactor;
NOTICE_LOG(G3D, "%s%s (%0.3f ms)", indent[scope.level & 3], scope.name.c_str(), milliseconds);
}
scopes_.clear();
}
// Only need to reset all on the first frame.
if (firstFrame_) {
numQueries_ = MAX_QUERY_COUNT;
firstFrame_ = false;
}
vkCmdResetQueryPool(firstCommandBuf, queryPool_, 0, numQueries_);
numQueries_ = 0;
}
void VulkanProfiler::Begin(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stageFlags, std::string scopeName) {
if (numQueries_ >= MAX_QUERY_COUNT - 1) {
return;
}
ProfilerScope scope;
scope.name = scopeName;
scope.startQueryId = numQueries_;
scope.endQueryId = -1;
scope.level = (int)scopeStack_.size();
scopeStack_.push_back(scopes_.size());
scopes_.push_back(scope);
vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
numQueries_++;
}
void VulkanProfiler::End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stageFlags) {
if (numQueries_ >= MAX_QUERY_COUNT - 1) {
return;
}
size_t scopeId = scopeStack_.back();
scopeStack_.pop_back();
ProfilerScope &scope = scopes_[scopeId];
scope.endQueryId = numQueries_;
vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
numQueries_++;
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <vector>
#include <string>
#include "Common/Log.h"
#include "VulkanLoader.h"
// Simple scoped based profiler, initially meant for instant one-time tasks like texture uploads
// etc. Supports recursive scopes. Scopes are not yet tracked separately for each command buffer.
// For the pass profiler in VulkanQueueRunner, a purpose-built separate profiler that can take only
// one measurement between each pass makes more sense.
//
// Put the whole thing in a FrameData to allow for overlap.
struct ProfilerScope {
std::string name;
size_t startQueryId;
size_t endQueryId;
int level;
};
class VulkanContext;
class VulkanProfiler {
public:
void Init(VulkanContext *vulkan);
void Shutdown();
void BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuffer);
void Begin(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stage, std::string scopeName);
void End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stage);
private:
VulkanContext *vulkan_;
VkQueryPool queryPool_ = VK_NULL_HANDLE;
std::vector<ProfilerScope> scopes_;
int numQueries_ = 0;
bool firstFrame_ = true;
std::vector<size_t> scopeStack_;
const int MAX_QUERY_COUNT = 1024;
};

View File

@ -583,7 +583,12 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
if (!run_) {
WARN_LOG(G3D, "BeginFrame while !run_!");
}
vulkan_->BeginFrame();
#if defined(VULKAN_PROFILER_ENABLED)
vulkan_->BeginFrame(GetInitCmd());
#else
vulkan_->BeginFrame(VK_NULL_HANDLE);
#endif
insideFrame_ = true;
renderStepOffset_ = 0;

View File

@ -731,8 +731,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
// Compute experiment
if (actualFmt == VULKAN_8888_FORMAT && scaleFactor > 1 && hardwareScaling) {
if (actualFmt == VULKAN_8888_FORMAT && scaleFactor > 1 && hardwareScaling && !IsVideo(entry->addr)) {
if (uploadCS_ != VK_NULL_HANDLE)
computeUpload = true;
}
@ -787,6 +786,9 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
}
if (entry->vkTex) {
VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
StringFromFormat("Texture Upload"));
// NOTE: Since the level is not part of the cache key, we assume it never changes.
u8 level = std::max(0, gstate.getTexLevelOffset16() / 16);
bool fakeMipmap = IsFakeMipmapChange() && level > 0;
@ -815,7 +817,10 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
double replaceStart = time_now_d();
replaced.Load(i, data, stride); // if it fails, it'll just be garbage data... OK for now.
replacementTimeThisFrame_ += time_now_d() - replaceStart;
VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT,
StringFromFormat("Copy Upload (replaced): %dx%d", mipWidth, mipHeight));
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT);
} else {
if (fakeMipmap) {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
@ -835,15 +840,21 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
VkImageView view = entry->vkTex->CreateViewForMip(i);
VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(view, texBuf, bufferOffset, srcSize);
struct Params { int x; int y; } params{ mipUnscaledWidth, mipUnscaledHeight };
VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
StringFromFormat("Compute Upload: %dx%d->%dx%d", mipUnscaledWidth, mipUnscaledHeight, mipWidth, mipHeight));
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(uploadCS_));
vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr);
vkCmdPushConstants(cmdInit, computeShaderManager_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), &params);
vkCmdDispatch(cmdInit, (mipUnscaledWidth + 7) / 8, (mipUnscaledHeight + 7) / 8, 1);
VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
vulkan->Delete().QueueDeleteImageView(view);
} else {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT,
StringFromFormat("Copy Upload (replaced): %dx%d", mipWidth, mipHeight));
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT);
}
}
if (replacer_.Enabled()) {
@ -861,9 +872,11 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
// Generate any additional mipmap levels.
// This will transition the whole stack to GENERAL if it wasn't already.
if (maxLevel != maxLevelToGenerate) {
VK_PROFILE_BEGIN(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT, StringFromFormat("Mipgen up to level %d", maxLevelToGenerate));
entry->vkTex->GenerateMips(cmdInit, maxLevel + 1, computeUpload);
layout = VK_IMAGE_LAYOUT_GENERAL;
prevStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_TRANSFER_BIT);
}
if (maxLevel == 0) {
@ -875,6 +888,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
}
entry->vkTex->EndCreate(cmdInit, false, prevStage, layout);
VK_PROFILE_END(vulkan, cmdInit, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
}

View File

@ -131,6 +131,7 @@ VULKAN_FILES := \
$(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanImage.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanMemory.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanProfiler.cpp \
$(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \
$(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \
$(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \

View File

@ -262,6 +262,7 @@ SOURCES_CXX += \
$(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanImage.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanMemory.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanProfiler.cpp \
$(COMMONDIR)/Input/GestureDetector.cpp \
$(COMMONDIR)/Input/InputState.cpp \
$(COMMONDIR)/Math/curves.cpp \