Sketch the new VulkanPushPool

2024-11-23 13:30:02 +00:00 · 2023-03-14 22:44:37 +01:00 · 2023-03-14 22:44:37 +01:00 · 56d792f422
commit 56d792f422
parent c31b4be690
4 changed files with 225 additions and 23 deletions
--- a/Common/GPU/Vulkan/VulkanContext.h
+++ b/Common/GPU/Vulkan/VulkanContext.h
@ -330,6 +330,7 @@ public:
 	}

 	int GetInflightFrames() const {
+		// out of MAX_INFLIGHT_FRAMES.
 		return inflightFrames_;
 	}
 	// Don't call while a frame is in progress.
--- a/Common/GPU/Vulkan/VulkanMemory.cpp
+++ b/Common/GPU/Vulkan/VulkanMemory.cpp
@ -25,6 +25,7 @@

 #include "Common/Log.h"
 #include "Common/TimeUtil.h"
+#include "Common/Math/math_util.h"
 #include "Common/GPU/Vulkan/VulkanMemory.h"

 using namespace PPSSPP_VK;
@ -32,7 +33,16 @@ using namespace PPSSPP_VK;
 // Global push buffer tracker for vulkan memory profiling.
 // Don't want to manually dig up all the active push buffers.
 static std::mutex g_pushBufferListMutex;
-static std::set<VulkanPushBuffer *> g_pushBuffers;
+static std::set<VulkanMemoryManager *> g_pushBuffers;
+
+std::vector<VulkanMemoryManager *> GetActiveVulkanMemoryManagers() {
+	std::vector<VulkanMemoryManager *> buffers;
+	std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
+	for (auto iter : g_pushBuffers) {
+		buffers.push_back(iter);
+	}
+	return buffers;
+}

 VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage, PushBufferType type)
 		: vulkan_(vulkan), name_(name), size_(size), usage_(usage), type_(type) {
@ -55,15 +65,6 @@ VulkanPushBuffer::~VulkanPushBuffer() {
 	_assert_(buffers_.empty());
 }

-std::vector<VulkanPushBuffer *> VulkanPushBuffer::GetAllActive() {
-	std::vector<VulkanPushBuffer *> buffers;
-	std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
-	for (auto iter : g_pushBuffers) {
-		buffers.push_back(iter);
-	}
-	return buffers;
-}
-
 bool VulkanPushBuffer::AddBuffer() {
 	BufInfo info;
 	VkDevice device = vulkan_->GetDevice();
@ -264,3 +265,118 @@ VkResult VulkanDescSetPool::Recreate(bool grow) {
 	}
 	return result;
 }
+
+VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
+	: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
+	{
+		std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
+		g_pushBuffers.insert(this);
+	}
+
+	for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
+		blocks_.push_back(CreateBlock(originalBlockSize));
+		blocks_.back().original = true;
+		blocks_.back().frameIndex = i;
+	}
+}
+
+VulkanPushPool::~VulkanPushPool() {
+	{
+		std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
+		g_pushBuffers.erase(this);
+	}
+
+	_dbg_assert_(blocks_.empty());
+}
+
+void VulkanPushPool::Destroy() {
+	for (auto &block : blocks_) {
+		block.Destroy(vulkan_);
+	}
+	blocks_.clear();
+}
+
+VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
+	Block block{};
+	block.size = size;
+	block.frameIndex = -1;
+
+	VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
+	b.size = size;
+	b.usage = usage_;
+	b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+	VmaAllocationCreateInfo allocCreateInfo{};
+	allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+	VmaAllocationInfo allocInfo{};
+	
+	VkResult result = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &block.buffer, &block.allocation, &allocInfo);
+	_dbg_assert_(result == VK_SUCCESS);
+
+	result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr));
+	_dbg_assert_(result == VK_SUCCESS);
+
+	return block;
+}
+
+VulkanPushPool::Block::~Block() {}
+
+void VulkanPushPool::Block::Destroy(VulkanContext *vulkan) {
+	vmaUnmapMemory(vulkan->Allocator(), allocation);
+	vulkan->Delete().QueueDeleteBufferAllocation(buffer, allocation);
+}
+
+void VulkanPushPool::BeginFrame() {
+	curBlockIndex_ = -1;
+	for (auto &block : blocks_) {
+		if (block.frameIndex == vulkan_->GetCurFrame()) {
+			if (curBlockIndex_ == -1) {
+				// Pick a block associated with the current frame to start at.
+				// We always start with one block per frame index.
+				curBlockIndex_ = block.frameIndex;
+			}
+			block.used = 0;
+			if (!block.original) {
+				// Return block to the common pool
+				block.frameIndex = -1;
+			}
+		}
+		// TODO: Also garbage collect blocks that have been unused for many frames here.
+	}
+}
+
+void VulkanPushPool::NextBlock(VkDeviceSize allocationSize) {
+	int curFrameIndex = vulkan_->GetCurFrame();
+	curBlockIndex_++;
+	while (curBlockIndex_ < blocks_.size()) {
+		Block &block = blocks_[curBlockIndex_];
+		if (block.frameIndex == curFrameIndex) {
+			_assert_(block.used == 0);
+			block.used = allocationSize;
+			return;
+		}
+		curBlockIndex_++;
+	}
+
+	VkDeviceSize newBlockSize = std::max(originalBlockSize_, (VkDeviceSize)RoundUpToPowerOf2((uint32_t)allocationSize));
+	// We're still here and ran off the end of blocks. Create a new one.
+	blocks_.push_back(CreateBlock(newBlockSize));
+	blocks_.back().frameIndex = curFrameIndex;
+	blocks_.back().used = allocationSize;
+	// curBlockIndex_ is already set correctly here.
+}
+
+size_t VulkanPushPool::GetTotalSize() const {
+	size_t sz = 0;
+	for (auto &block : blocks_) {
+		sz += block.used;
+	}
+	return sz;
+}
+
+size_t VulkanPushPool::GetTotalCapacity() const {
+	size_t sz = 0;
+	for (auto &block : blocks_) {
+		sz += block.size;
+	}
+	return sz;
+}
--- a/Common/GPU/Vulkan/VulkanMemory.h
+++ b/Common/GPU/Vulkan/VulkanMemory.h
@ -19,6 +19,16 @@ enum class PushBufferType {
 	GPU_ONLY,
 };

+// Just an abstract thing to get debug information.
+class VulkanMemoryManager {
+public:
+	virtual ~VulkanMemoryManager() {}
+
+	virtual size_t GetTotalSize() const = 0;
+	virtual size_t GetTotalCapacity() const = 0;
+	virtual const char *Name() const = 0;
+};
+
 // VulkanPushBuffer
 // Simple incrementing allocator.
 // Use these to push vertex, index and uniform data. Generally you'll have two of these
@ -26,7 +36,7 @@ enum class PushBufferType {
 // has completed.
 //
 // TODO: Make it possible to suballocate pushbuffers from a large DeviceMemory block.
-class VulkanPushBuffer {
+class VulkanPushBuffer : public VulkanMemoryManager {
 	struct BufInfo {
 		VkBuffer buffer;
 		VmaAllocation allocation;
@ -102,7 +112,7 @@ public:
 		return offset_;
 	}

-	const char *Name() const {
+	const char *Name() const override {
 		return name_;
 	}

@ -131,10 +141,8 @@ public:
 		info->range = sizeof(T);
 	}

-	size_t GetTotalSize() const;  // Used size
-	size_t GetTotalCapacity() const;
-
-	static std::vector<VulkanPushBuffer *> GetAllActive();
+	size_t GetTotalSize() const override;  // Used size
+	size_t GetTotalCapacity() const override;

 private:
 	bool AddBuffer();
@ -153,6 +161,80 @@ private:
 	const char *name_;
 };

+// Simple memory pushbuffer pool that can share blocks between the "frames", to reduce the impact of push memory spikes -
+// a later frame can gobble up redundant buffers from an earlier frame even if they don't share frame index.
+class VulkanPushPool : public VulkanMemoryManager {
+public:
+	VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage);
+	~VulkanPushPool();
+
+	void Destroy();
+	void BeginFrame();
+
+	size_t GetTotalSize() const override;  // Used size
+	size_t GetTotalCapacity() const override;
+
+	// When using the returned memory, make sure to bind the returned vkbuf.
+	uint8_t *Allocate(VkDeviceSize numBytes, VkDeviceSize alignment, VkBuffer *vkbuf, uint32_t *bindOffset) {
+		_dbg_assert_(curBlockIndex_ >= 0);
+		
+		Block &block = blocks_[curBlockIndex_];
+
+		VkDeviceSize offset = (block.used + (alignment - 1)) & ~(alignment - 1);
+		if (offset + numBytes <= block.size) {
+			block.used = offset + numBytes;
+			*vkbuf = block.buffer;
+			*bindOffset = (uint32_t)offset;
+			return block.writePtr + offset;
+		}
+
+		NextBlock(numBytes);
+
+		*vkbuf = blocks_[curBlockIndex_].buffer;
+		*bindOffset = 0;  // Newly allocated buffer will start at 0.
+		return blocks_[curBlockIndex_].writePtr;
+	}
+
+	VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
+		uint32_t bindOffset;
+		uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
+		memcpy(ptr, data, numBytes);
+		return bindOffset;
+	}
+
+	const char *Name() const override {
+		return name_;
+	}
+
+private:
+	void NextBlock(VkDeviceSize allocationSize);
+
+	struct Block {
+		~Block();
+		VkBuffer buffer;
+		VmaAllocation allocation;
+
+		VkDeviceSize size;
+		VkDeviceSize used;
+
+		int frameIndex;
+		bool original;  // these blocks aren't garbage collected.
+
+		uint8_t *writePtr;
+
+		void Destroy(VulkanContext *vulkan);
+	};
+
+	Block CreateBlock(size_t sz);
+
+	VulkanContext *vulkan_;
+	VkDeviceSize originalBlockSize_;
+	std::vector<Block> blocks_;
+	VkBufferUsageFlags usage_;
+	int curBlockIndex_ = -1;
+	const char *name_;
+};
+
 // Only appropriate for use in a per-frame pool.
 class VulkanDescSetPool {
 public:
@ -182,3 +264,6 @@ private:
 	uint32_t usage_ = 0;
 	bool grow_;
 };
+
+std::vector<VulkanMemoryManager *> GetActiveVulkanMemoryManagers();
+
--- a/GPU/Vulkan/DebugVisVulkan.cpp
+++ b/GPU/Vulkan/DebugVisVulkan.cpp
@ -39,7 +39,7 @@

 #undef DrawText

-bool comparePushBufferNames(const VulkanPushBuffer *a, const VulkanPushBuffer *b) {
+bool comparePushBufferNames(const VulkanMemoryManager *a, const VulkanMemoryManager *b) {
 	return strcmp(a->Name(), b->Name()) < 0;
 }

@ -75,13 +75,13 @@ void DrawAllocatorVis(UIContext *ui, GPUInterface *gpu) {
 	str << "Push buffers:" << std::endl;

 	// Now list the various push buffers.
-	auto pushBuffers = VulkanPushBuffer::GetAllActive();
-	std::sort(pushBuffers.begin(), pushBuffers.end(), comparePushBufferNames);
+	auto managers = GetActiveVulkanMemoryManagers();
+	std::sort(managers.begin(), managers.end(), comparePushBufferNames);

-	for (auto push : pushBuffers) {
-		str << "  " << push->Name() << " "
-			<< NiceSizeFormat(push->GetTotalCapacity()) << ", used: "
-			<< NiceSizeFormat(push->GetTotalSize()) << std::endl;
+	for (auto manager : managers) {
+		str << "  " << manager->Name() << " "
+			<< NiceSizeFormat(manager->GetTotalCapacity()) << ", used: "
+			<< NiceSizeFormat(manager->GetTotalSize()) << std::endl;
 	}

 	const int padding = 10 + System_GetPropertyFloat(SYSPROP_DISPLAY_SAFE_INSET_LEFT);