Merge pull request #16802 from hrydgard/parallel-pipeline-creation

Vulkan: Parallel pipeline creation
2024-11-26 23:10:38 +00:00 · 2023-02-01 12:23:05 +01:00 · 2023-02-01 12:23:05 +01:00 · 2ed88a83cb
commit 2ed88a83cb
parent 0cfce04a04 a67604d003
4 changed files with 92 additions and 32 deletions
--- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
@ -1336,7 +1336,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
 					// Maybe a middle pass. But let's try to just block and compile here for now, this doesn't
 					// happen all that much.
 					graphicsPipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();
-					graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount);
+					graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount, time_now_d(), -1);
 				}

 				VkPipeline pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady();
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@ -1,6 +1,7 @@
 #include <algorithm>
 #include <cstdint>

+#include <map>
 #include <sstream>

 #include "Common/Log.h"
@ -27,7 +28,7 @@
 using namespace PPSSPP_VK;

 // renderPass is an example of the "compatibility class" or RenderPassType type.
-bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount) {
+bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {
 	bool multisample = RenderPassTypeHasMultisample(rpType);
 	if (multisample) {
 		if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
@ -118,12 +119,17 @@ bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleR
 	double start = time_now_d();
 	VkPipeline vkpipeline;
 	VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);
-	double taken_ms = (time_now_d() - start) * 1000.0;
+
+	double now = time_now_d();
+	double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;
+	double taken_ms = (now - start) * 1000.0;

 	if (taken_ms < 0.1) {
-		DEBUG_LOG(G3D, "Pipeline creation time: %0.2f ms (fast) rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
+		DEBUG_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",
+			countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
 	} else {
-		INFO_LOG(G3D, "Pipeline creation time: %0.2f ms  rpType: %08x sampleBits: %d (%s)", taken_ms, (u32)rpType, (u32)sampleCount, tag_.c_str());
+		INFO_LOG(G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling  rpType: %04x sampleBits: %d (%s)",
+			countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());
 	}

 	bool success = true;
@ -218,26 +224,27 @@ void VKRGraphicsPipeline::LogCreationFailure() const {
 	ERROR_LOG(G3D, "======== END OF PIPELINE ==========");
 }

-bool VKRComputePipeline::Create(VulkanContext *vulkan) {
+bool VKRComputePipeline::CreateAsync(VulkanContext *vulkan) {
 	if (!desc) {
 		// Already failed to create this one.
 		return false;
 	}
-	VkPipeline vkpipeline;
-	VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);
+	pipeline->SpawnEmpty(&g_threadManager, [=] {
+		VkPipeline vkpipeline;
+		VkResult result = vkCreateComputePipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &desc->pipe, nullptr, &vkpipeline);

-	bool success = true;
-	if (result != VK_SUCCESS) {
-		pipeline->Post(VK_NULL_HANDLE);
-		ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
-		success = false;
-	} else {
-		pipeline->Post(vkpipeline);
-	}
-
-	delete desc;
+		bool success = true;
+		if (result == VK_SUCCESS) {
+			return vkpipeline;
+		} else {
+			ERROR_LOG(G3D, "Failed creating compute pipeline! result='%s'", VulkanResultToString(result));
+			success = false;
+			return (VkPipeline)VK_NULL_HANDLE;
+		}
+		delete desc;
+	}, TaskType::CPU_COMPUTE);
 	desc = nullptr;
-	return success;
+	return true;
 }

 VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
@ -370,7 +377,6 @@ VulkanRenderManager::~VulkanRenderManager() {

 	vulkan_->WaitUntilQueueIdle();

-	DrainCompileQueue();
 	VkDevice device = vulkan_->GetDevice();
 	frameDataShared_.Destroy(vulkan_);
 	for (int i = 0; i < inflightFramesAtStart_; i++) {
@ -379,12 +385,43 @@ VulkanRenderManager::~VulkanRenderManager() {
 	queueRunner_.DestroyDeviceObjects();
 }

+struct SinglePipelineTask {
+	VKRGraphicsPipeline *pipeline;
+	VkRenderPass compatibleRenderPass;
+	RenderPassType rpType;
+	VkSampleCountFlagBits sampleCount;
+	double scheduleTime;
+	int countToCompile;
+};
+
+class CreateMultiPipelinesTask : public Task {
+public:
+	CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(tasks) {}
+	~CreateMultiPipelinesTask() {}
+
+	TaskType Type() const override {
+		return TaskType::CPU_COMPUTE;
+	}
+
+	void Run() override {
+		for (auto &task : tasks_) {
+			task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);
+		}
+	}
+
+	VulkanContext *vulkan_;
+	std::vector<SinglePipelineTask> tasks_;
+};
+
 void VulkanRenderManager::CompileThreadFunc() {
 	SetCurrentThreadName("ShaderCompile");
 	while (true) {
 		std::vector<CompileQueueEntry> toCompile;
 		{
 			std::unique_lock<std::mutex> lock(compileMutex_);
+			// TODO: Should this be while?
+			// It may be beneficial also to unlock and wait a little bit to see if we get some more shaders
+			// so we can do a better job of thread-sorting them.
 			if (compileQueue_.empty() && run_) {
 				compileCond_.wait(lock);
 			}
@ -395,24 +432,46 @@ void VulkanRenderManager::CompileThreadFunc() {
 			break;
 		}

-		double time = time_now_d();
-		// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
-		// and split up further.
-		// Those with the same pairs of shaders should be on the same thread.
+		int countToCompile = (int)toCompile.size();
+
+		// Here we sort the pending pipelines by vertex and fragment shaders,
+		std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;
+
+		double scheduleTime = time_now_d();
+
+		// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.
+		// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.
+		// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,
+		// so we might want a different splitting algorithm there.
 		for (auto &entry : toCompile) {
 			switch (entry.type) {
 			case CompileQueueEntry::Type::GRAPHICS:
-				entry.graphics->Create(vulkan_, entry.compatibleRenderPass, entry.renderPassType, entry.sampleCount);
+				map[std::pair< Promise<VkShaderModule> *, Promise<VkShaderModule> *>(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(
+					SinglePipelineTask{
+						entry.graphics,
+						entry.compatibleRenderPass,
+						entry.renderPassType,
+						entry.sampleCount,
+						scheduleTime,    // these two are for logging purposes.
+						countToCompile,
+					}
+				);
 				break;
 			case CompileQueueEntry::Type::COMPUTE:
-				entry.compute->Create(vulkan_);
+				// Queue up pending compute pipelines on separate tasks.
+				entry.compute->CreateAsync(vulkan_);
 				break;
 			}
 		}

-		double delta = time_now_d() - time;
-		if (delta > 0.005f) {
-			INFO_LOG(G3D, "CompileThreadFunc: Creating %d pipelines took %0.3f ms", (int)toCompile.size(), delta * 1000.0f);
+		for (auto iter : map) {
+			auto &shaders = iter.first;
+			auto &entries = iter.second;
+
+			// NOTICE_LOG(G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());
+
+			Task *task = new CreateMultiPipelinesTask(vulkan_, entries);
+			g_threadManager.EnqueueTask(task);
 		}

 		queueRunner_.NotifyCompileDone();
--- a/Common/GPU/Vulkan/VulkanRenderManager.h
+++ b/Common/GPU/Vulkan/VulkanRenderManager.h
@ -120,7 +120,7 @@ struct VKRGraphicsPipeline {
 	VKRGraphicsPipeline(PipelineFlags flags, const char *tag) : flags_(flags), tag_(tag) {}
 	~VKRGraphicsPipeline();

-	bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount);
+	bool Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile);

 	void DestroyVariants(VulkanContext *vulkan, bool msaaOnly);

@ -137,6 +137,7 @@ struct VKRGraphicsPipeline {
 	VkSampleCountFlagBits SampleCount() const { return sampleCount_; }

 	const char *Tag() const { return tag_.c_str(); }
+
 private:
 	void DestroyVariantsInstant(VkDevice device);

@ -153,7 +154,7 @@ struct VKRComputePipeline {
 	VKRComputePipelineDesc *desc = nullptr;
 	Promise<VkPipeline> *pipeline = nullptr;

-	bool Create(VulkanContext *vulkan);
+	bool CreateAsync(VulkanContext *vulkan);
 	bool Pending() const {
 		return pipeline == VK_NULL_HANDLE && desc != nullptr;
 	}
--- a/GPU/Vulkan/ShaderManagerVulkan.cpp
+++ b/GPU/Vulkan/ShaderManagerVulkan.cpp
@ -106,7 +106,7 @@ static Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan,
 	if (singleThreaded) {
 		return Promise<VkShaderModule>::AlreadyDone(compile());
 	} else {
-		return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::CPU_COMPUTE);
+		return Promise<VkShaderModule>::Spawn(&g_threadManager, compile, TaskType::DEDICATED_THREAD);
 	}
 }