Vulkan GPU profiler: Measure the CPU time spent on the render thread recording command buffers.

This commit is contained in:
Henrik Rydgård 2019-09-18 00:16:08 +02:00
parent e723594133
commit 2467fddc01
4 changed files with 42 additions and 28 deletions

View File

@ -1,4 +1,6 @@
#include <map>
#include "base/timeutil.h"
#include "DataFormat.h"
#include "VulkanQueueRunner.h"
#include "VulkanRenderManager.h"
@ -375,7 +377,9 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}
void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, VkQueryPool queryPool, std::vector<std::string> *timestampDescriptions) {
void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile) {
if (profile)
profile->cpuStartTime = real_time_now();
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
// * Create copies of render target that are rendered to multiple times and textured from in sequence, and push those render passes
@ -465,9 +469,9 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
break;
}
if (queryPool) {
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, queryPool, (uint32_t)timestampDescriptions->size());
timestampDescriptions->push_back(StepToString(step));
if (profile) {
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile->queryPool, (uint32_t)profile->timestampDescriptions.size());
profile->timestampDescriptions.push_back(StepToString(step));
}
}
@ -476,6 +480,9 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
for (size_t i = 0; i < steps.size(); i++) {
delete steps[i];
}
if (profile)
profile->cpuEndTime = real_time_now();
}
void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {

View File

@ -173,6 +173,14 @@ struct TransitionRequest {
VkImageLayout targetLayout;
};
struct QueueProfileContext {
VkQueryPool queryPool;
std::vector<std::string> timestampDescriptions;
std::string profileSummary;
double cpuStartTime;
double cpuEndTime;
};
struct VKRStep {
VKRStep(VKRStepType _type) : stepType(_type) {}
~VKRStep() {}
@ -232,7 +240,7 @@ public:
}
// RunSteps can modify steps but will leave it in a valid state.
void RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, VkQueryPool queryPool, std::vector<std::string> *timestampDescriptions);
void RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &steps, QueueProfileContext *profile);
void LogSteps(const std::vector<VKRStep *> &steps);
std::string StepToString(const VKRStep &step) const;

View File

@ -137,7 +137,7 @@ VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan
VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
query_ci.queryCount = MAX_TIMESTAMP_QUERIES;
query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].timestampQueryPool_);
res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool);
}
queueRunner_.CreateDeviceObjects();
@ -227,7 +227,7 @@ void VulkanRenderManager::StopThread() {
frameData.pull_condVar.notify_all();
}
// Zero the queries so we don't try to pull them later.
frameData.timestampDescriptions.clear();
frameData.profile.timestampDescriptions.clear();
}
thread_.join();
ILOG("Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
@ -298,7 +298,7 @@ VulkanRenderManager::~VulkanRenderManager() {
vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr);
vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr);
vkDestroyFence(device, frameData_[i].fence, nullptr);
vkDestroyQueryPool(device, frameData_[i].timestampQueryPool_, nullptr);
vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr);
}
queueRunner_.DestroyDeviceObjects();
}
@ -375,11 +375,11 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
if (frameData.profilingEnabled_) {
// Pull the profiling results from last time and produce a summary!
if (!frameData.timestampDescriptions.empty()) {
int numQueries = (int)frameData.timestampDescriptions.size();
if (!frameData.profile.timestampDescriptions.empty()) {
int numQueries = (int)frameData.profile.timestampDescriptions.size();
VkResult res = vkGetQueryPoolResults(
vulkan_->GetDevice(),
frameData.timestampQueryPool_, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),
frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT);
if (res == VK_SUCCESS) {
double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
@ -390,19 +390,20 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
char line[256];
snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));
str << line;
snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);
str << line;
for (int i = 0; i < numQueries - 1; i++) {
uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;
double milliseconds = (double)diff * timestampConversionFactor;
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.timestampDescriptions[i + 1].c_str(), milliseconds);
snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);
str << line;
}
frameData.profileSummary = str.str();
frameData.profile.profileSummary = str.str();
} else {
frameData.profileSummary = "(error getting GPU profile - not ready?)";
frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";
}
}
else {
frameData.profileSummary = "(no GPU profile data collected)";
} else {
frameData.profile.profileSummary = "(no GPU profile data collected)";
}
}
@ -415,16 +416,16 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
insideFrame_ = true;
frameData.timestampDescriptions.clear();
frameData.profile.timestampDescriptions.clear();
if (frameData_->profilingEnabled_) {
// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,
// unless we want to limit ourselves to only measure the main cmd buffer.
// Reserve the first two queries for initCmd.
frameData.timestampDescriptions.push_back("initCmd Begin");
frameData.timestampDescriptions.push_back("initCmd");
frameData.profile.timestampDescriptions.push_back("initCmd Begin");
frameData.profile.timestampDescriptions.push_back("initCmd");
VkCommandBuffer initCmd = GetInitCmd();
vkCmdResetQueryPool(initCmd, frameData.timestampQueryPool_, 0, MAX_TIMESTAMP_QUERIES);
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.timestampQueryPool_, 0);
vkCmdResetQueryPool(initCmd, frameData.profile.queryPool, 0, MAX_TIMESTAMP_QUERIES);
vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.profile.queryPool, 0);
}
}
@ -960,7 +961,7 @@ void VulkanRenderManager::Submit(int frame, bool triggerFence) {
if (frameData.hasInitCommands) {
if (frameData.profilingEnabled_ && triggerFence) {
// Pre-allocated query ID 1.
vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.timestampQueryPool_, 1);
vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1);
}
VkResult res = vkEndCommandBuffer(frameData.initCmd);
_assert_msg_(G3D, res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res));
@ -1061,7 +1062,7 @@ void VulkanRenderManager::Run(int frame) {
auto &stepsOnThread = frameData_[frame].steps;
VkCommandBuffer cmd = frameData.mainCmd;
// queueRunner_.LogSteps(stepsOnThread);
queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? frameData.timestampQueryPool_ : VK_NULL_HANDLE, &frameData.timestampDescriptions);
queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr);
stepsOnThread.clear();
switch (frameData.type) {

View File

@ -258,7 +258,7 @@ public:
}
std::string GetGpuProfileString() const {
return frameData_[vulkan_->GetCurFrame()].profileSummary;
return frameData_[vulkan_->GetCurFrame()].profile.profileSummary;
}
private:
@ -305,10 +305,8 @@ private:
uint32_t curSwapchainImage = -1;
// Profiling.
VkQueryPool timestampQueryPool_ = VK_NULL_HANDLE;
QueueProfileContext profile;
bool profilingEnabled_;
std::vector<std::string> timestampDescriptions;
std::string profileSummary;
};
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];