mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-20 06:00:58 +00:00
Merge pull request #10393 from hrydgard/compute-upload
Vulkan: Texture upload through compute, experimental texture scaling too
This commit is contained in:
commit
0b17dd04e6
@ -121,9 +121,10 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *all
|
||||
if (initialLayout != VK_IMAGE_LAYOUT_UNDEFINED && initialLayout != VK_IMAGE_LAYOUT_PREINITIALIZED) {
|
||||
switch (initialLayout) {
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
case VK_IMAGE_LAYOUT_GENERAL:
|
||||
TransitionImageLayout2(cmd, image_, 0, numMips, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
break;
|
||||
default:
|
||||
@ -208,10 +209,10 @@ void VulkanTexture::GenerateMip(VkCommandBuffer cmd, int mip) {
|
||||
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
}
|
||||
|
||||
void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture) {
|
||||
void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkImageLayout layout) {
|
||||
TransitionImageLayout2(cmd, image_, 0, numMips_,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
layout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
}
|
||||
@ -222,6 +223,26 @@ void VulkanTexture::Touch() {
|
||||
}
|
||||
}
|
||||
|
||||
VkImageView VulkanTexture::CreateViewForMip(int mip) {
|
||||
VkImageViewCreateInfo view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
|
||||
view_info.image = image_;
|
||||
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||
view_info.format = format_;
|
||||
view_info.components.r = VK_COMPONENT_SWIZZLE_R;
|
||||
view_info.components.g = VK_COMPONENT_SWIZZLE_G;
|
||||
view_info.components.b = VK_COMPONENT_SWIZZLE_B;
|
||||
view_info.components.a = VK_COMPONENT_SWIZZLE_A;
|
||||
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
view_info.subresourceRange.baseMipLevel = mip;
|
||||
view_info.subresourceRange.levelCount = 1;
|
||||
view_info.subresourceRange.baseArrayLayer = 0;
|
||||
view_info.subresourceRange.layerCount = 1;
|
||||
VkImageView view;
|
||||
VkResult res = vkCreateImageView(vulkan_->GetDevice(), &view_info, NULL, &view);
|
||||
assert(res == VK_SUCCESS);
|
||||
return view;
|
||||
}
|
||||
|
||||
void VulkanTexture::Destroy() {
|
||||
if (view_ != VK_NULL_HANDLE) {
|
||||
vulkan_->Delete().QueueDeleteImageView(view_);
|
||||
|
@ -21,7 +21,11 @@ public:
|
||||
bool CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *allocator, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr);
|
||||
void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels
|
||||
void GenerateMip(VkCommandBuffer cmd, int mip);
|
||||
void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false);
|
||||
void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
// When loading mips from compute shaders, you need to pass VK_IMAGE_LAYOUT_GENERAL to the above function.
|
||||
// In addition, ignore UploadMip and GenerateMip, and instead use GetViewForMip. Make sure to delete the returned views when used.
|
||||
VkImageView CreateViewForMip(int mip);
|
||||
|
||||
void Destroy();
|
||||
|
||||
|
@ -23,8 +23,8 @@
|
||||
#include "base/timeutil.h"
|
||||
#include "math/math_util.h"
|
||||
|
||||
VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, size_t size, VkBufferUsageFlags usage)
|
||||
: vulkan_(vulkan), size_(size), usage_(usage) {
|
||||
VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, size_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryPropertyMask)
|
||||
: vulkan_(vulkan), memoryPropertyMask_(memoryPropertyMask), size_(size), usage_(usage) {
|
||||
bool res = AddBuffer();
|
||||
assert(res);
|
||||
}
|
||||
@ -58,7 +58,7 @@ bool VulkanPushBuffer::AddBuffer() {
|
||||
// Okay, that's the buffer. Now let's allocate some memory for it.
|
||||
VkMemoryAllocateInfo alloc{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
|
||||
alloc.allocationSize = reqs.size;
|
||||
vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, &alloc.memoryTypeIndex);
|
||||
vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, memoryPropertyMask_, &alloc.memoryTypeIndex);
|
||||
|
||||
res = vkAllocateMemory(device, &alloc, nullptr, &info.deviceMemory);
|
||||
if (VK_SUCCESS != res) {
|
||||
@ -89,7 +89,8 @@ void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {
|
||||
|
||||
void VulkanPushBuffer::NextBuffer(size_t minSize) {
|
||||
// First, unmap the current memory.
|
||||
Unmap();
|
||||
if (memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
Unmap();
|
||||
|
||||
buf_++;
|
||||
if (buf_ >= buffers_.size() || minSize > size_) {
|
||||
@ -108,7 +109,8 @@ void VulkanPushBuffer::NextBuffer(size_t minSize) {
|
||||
|
||||
// Now, move to the next buffer and map it.
|
||||
offset_ = 0;
|
||||
Map();
|
||||
if (memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
Map();
|
||||
}
|
||||
|
||||
void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
|
||||
@ -142,14 +144,15 @@ void VulkanPushBuffer::Map() {
|
||||
|
||||
void VulkanPushBuffer::Unmap() {
|
||||
_dbg_assert_(G3D, writePtr_ != 0);
|
||||
/*
|
||||
// Should not need this since we use coherent memory.
|
||||
VkMappedMemoryRange range{ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE };
|
||||
range.offset = 0;
|
||||
range.size = offset_;
|
||||
range.memory = buffers_[buf_].deviceMemory;
|
||||
vkFlushMappedMemoryRanges(device_, 1, &range);
|
||||
*/
|
||||
|
||||
if ((memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == 0) {
|
||||
VkMappedMemoryRange range{ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE };
|
||||
range.offset = 0;
|
||||
range.size = offset_;
|
||||
range.memory = buffers_[buf_].deviceMemory;
|
||||
vkFlushMappedMemoryRanges(vulkan_->GetDevice(), 1, &range);
|
||||
}
|
||||
|
||||
vkUnmapMemory(vulkan_->GetDevice(), buffers_[buf_].deviceMemory);
|
||||
writePtr_ = nullptr;
|
||||
}
|
||||
|
@ -22,7 +22,10 @@ class VulkanPushBuffer {
|
||||
};
|
||||
|
||||
public:
|
||||
VulkanPushBuffer(VulkanContext *vulkan, size_t size, VkBufferUsageFlags usage);
|
||||
// NOTE: If you create a push buffer with only VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
// then you can't use any of the push functions as pointers will not be reachable from the CPU.
|
||||
// You must in this case use Allocate() only, and pass the returned offset and the VkBuffer to Vulkan APIs.
|
||||
VulkanPushBuffer(VulkanContext *vulkan, size_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memoryPropertyMask = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||
~VulkanPushBuffer();
|
||||
|
||||
void Destroy(VulkanContext *vulkan);
|
||||
@ -35,15 +38,18 @@ public:
|
||||
offset_ = 0;
|
||||
// Note: we must defrag because some buffers may be smaller than size_.
|
||||
Defragment(vulkan);
|
||||
Map();
|
||||
if (memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
Map();
|
||||
}
|
||||
|
||||
void BeginNoReset() {
|
||||
Map();
|
||||
if (memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
Map();
|
||||
}
|
||||
|
||||
void End() {
|
||||
Unmap();
|
||||
if (memoryPropertyMask_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
Unmap();
|
||||
}
|
||||
|
||||
void Map();
|
||||
@ -109,6 +115,8 @@ private:
|
||||
void Defragment(VulkanContext *vulkan);
|
||||
|
||||
VulkanContext *vulkan_;
|
||||
VkMemoryPropertyFlags memoryPropertyMask_;
|
||||
|
||||
std::vector<BufInfo> buffers_;
|
||||
size_t buf_ = 0;
|
||||
size_t offset_ = 0;
|
||||
|
@ -751,6 +751,7 @@ static ConfigSetting graphicsSettings[] = {
|
||||
ReportedConfigSetting("TexScalingLevel", &g_Config.iTexScalingLevel, 1, true, true),
|
||||
ReportedConfigSetting("TexScalingType", &g_Config.iTexScalingType, 0, true, true),
|
||||
ReportedConfigSetting("TexDeposterize", &g_Config.bTexDeposterize, false, true, true),
|
||||
ReportedConfigSetting("TexHardwareScaling", &g_Config.bTexHardwareScaling, false, true, true),
|
||||
ConfigSetting("VSyncInterval", &g_Config.bVSync, false, true, true),
|
||||
ReportedConfigSetting("BloomHack", &g_Config.iBloomHack, 0, true, true),
|
||||
|
||||
|
@ -170,6 +170,7 @@ public:
|
||||
int iTexScalingLevel; // 0 = auto, 1 = off, 2 = 2x, ..., 5 = 5x
|
||||
int iTexScalingType; // 0 = xBRZ, 1 = Hybrid
|
||||
bool bTexDeposterize;
|
||||
bool bTexHardwareScaling;
|
||||
int iFpsLimit1;
|
||||
int iFpsLimit2;
|
||||
int iMaxRecent;
|
||||
|
@ -153,6 +153,8 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
frame_[i].pushUBO = new VulkanPushBuffer(vulkan_, 8 * 1024 * 1024, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
|
||||
frame_[i].pushVertex = new VulkanPushBuffer(vulkan_, 2 * 1024 * 1024, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
|
||||
frame_[i].pushIndex = new VulkanPushBuffer(vulkan_, 1 * 1024 * 1024, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
|
||||
|
||||
frame_[i].pushLocal = new VulkanPushBuffer(vulkan_, 1 * 1024 * 1024, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
}
|
||||
|
||||
VkPipelineLayoutCreateInfo pl{ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
|
||||
@ -209,6 +211,11 @@ void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) {
|
||||
delete pushIndex;
|
||||
pushIndex = nullptr;
|
||||
}
|
||||
if (pushLocal) {
|
||||
pushLocal->Destroy(vulkan);
|
||||
delete pushLocal;
|
||||
pushLocal = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void DrawEngineVulkan::DestroyDeviceObjects() {
|
||||
@ -264,10 +271,12 @@ void DrawEngineVulkan::BeginFrame() {
|
||||
frame->pushUBO->Reset();
|
||||
frame->pushVertex->Reset();
|
||||
frame->pushIndex->Reset();
|
||||
frame->pushLocal->Reset();
|
||||
|
||||
frame->pushUBO->Begin(vulkan_);
|
||||
frame->pushVertex->Begin(vulkan_);
|
||||
frame->pushIndex->Begin(vulkan_);
|
||||
frame->pushLocal->Begin(vulkan_);
|
||||
|
||||
// TODO: How can we make this nicer...
|
||||
tessDataTransferVulkan->SetPushBuffer(frame->pushUBO);
|
||||
@ -324,6 +333,7 @@ void DrawEngineVulkan::EndFrame() {
|
||||
frame->pushUBO->End();
|
||||
frame->pushVertex->End();
|
||||
frame->pushIndex->End();
|
||||
frame->pushLocal->End();
|
||||
vertexCache_->End();
|
||||
}
|
||||
|
||||
|
@ -188,6 +188,11 @@ public:
|
||||
return frame_[vulkan_->GetCurFrame()].pushUBO;
|
||||
}
|
||||
|
||||
// Only use Allocate on this one.
|
||||
VulkanPushBuffer *GetPushBufferLocal() {
|
||||
return frame_[vulkan_->GetCurFrame()].pushLocal;
|
||||
}
|
||||
|
||||
const DrawEngineVulkanStats &GetStats() const {
|
||||
return stats_;
|
||||
}
|
||||
@ -257,6 +262,10 @@ private:
|
||||
VulkanPushBuffer *pushUBO = nullptr;
|
||||
VulkanPushBuffer *pushVertex = nullptr;
|
||||
VulkanPushBuffer *pushIndex = nullptr;
|
||||
|
||||
// Special push buffer in GPU local memory, for texture data conversion and similar tasks.
|
||||
VulkanPushBuffer *pushLocal;
|
||||
|
||||
// We do rolling allocation and reset instead of caching across frames. That we might do later.
|
||||
DenseHashMap<DescriptorSetKey, VkDescriptorSet, (VkDescriptorSet)VK_NULL_HANDLE> descSets;
|
||||
|
||||
|
@ -67,6 +67,450 @@ static const VkComponentMapping VULKAN_1555_SWIZZLE = { VK_COMPONENT_SWIZZLE_B,
|
||||
static const VkComponentMapping VULKAN_565_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
|
||||
static const VkComponentMapping VULKAN_8888_SWIZZLE = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
|
||||
|
||||
// 4xBRZ shader - Copyright (C) 2014-2016 DeSmuME team (GPL2+)
|
||||
// Hyllian's xBR-vertex code and texel mapping
|
||||
// Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com
|
||||
// TODO: Handles alpha badly for PSP.
|
||||
const char *shader4xbrz = R"(
|
||||
vec4 premultiply_alpha(vec4 c) { float a = clamp(c.a, 0.0, 1.0); return vec4(c.rgb * a, a); }
|
||||
vec4 postdivide_alpha(vec4 c) { return c.a < 0.001? vec4(0.0,0.0,0.0,0.0) : vec4(c.rgb / c.a, c.a); }
|
||||
|
||||
#define BLEND_ALPHA 1
|
||||
#define BLEND_NONE 0
|
||||
#define BLEND_NORMAL 1
|
||||
#define BLEND_DOMINANT 2
|
||||
#define LUMINANCE_WEIGHT 1.0
|
||||
#define EQUAL_COLOR_TOLERANCE 30.0/255.0
|
||||
#define STEEP_DIRECTION_THRESHOLD 2.2
|
||||
#define DOMINANT_DIRECTION_THRESHOLD 3.6
|
||||
|
||||
float reduce(vec4 color) {
|
||||
return dot(color.rgb, vec3(65536.0, 256.0, 1.0));
|
||||
}
|
||||
|
||||
float DistYCbCr(vec4 pixA, vec4 pixB) {
|
||||
const vec3 w = vec3(0.2627, 0.6780, 0.0593);
|
||||
const float scaleB = 0.5 / (1.0 - w.b);
|
||||
const float scaleR = 0.5 / (1.0 - w.r);
|
||||
vec4 diff = pixA - pixB;
|
||||
float Y = dot(diff.rgb, w);
|
||||
float Cb = scaleB * (diff.b - Y);
|
||||
float Cr = scaleR * (diff.r - Y);
|
||||
|
||||
return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) + (diff.a * diff.a));
|
||||
}
|
||||
|
||||
bool IsPixEqual(const vec4 pixA, const vec4 pixB) {
|
||||
return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE);
|
||||
}
|
||||
|
||||
bool IsBlendingNeeded(const ivec4 blend) {
|
||||
ivec4 diff = blend - ivec4(BLEND_NONE);
|
||||
return diff.x != 0 || diff.y != 0 || diff.z != 0 || diff.w != 0;
|
||||
}
|
||||
|
||||
vec4 applyScalingf(uvec2 origxy, uvec2 xy) {
|
||||
float dx = 1.0 / params.width;
|
||||
float dy = 1.0 / params.height;
|
||||
|
||||
// A1 B1 C1
|
||||
// A0 A B C C4
|
||||
// D0 D E F F4
|
||||
// G0 G H I I4
|
||||
// G5 H5 I5
|
||||
|
||||
uvec4 t1 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 2); // A1 B1 C1
|
||||
uvec4 t2 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y - 1); // A B C
|
||||
uvec4 t3 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 0); // D E F
|
||||
uvec4 t4 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 1); // G H I
|
||||
uvec4 t5 = uvec4(origxy.x - 1, origxy.x, origxy.x + 1, origxy.y + 2); // G5 H5 I5
|
||||
uvec4 t6 = uvec4(origxy.x - 2, origxy.y - 1, origxy.y, origxy.y + 1); // A0 D0 G0
|
||||
uvec4 t7 = uvec4(origxy.x + 2, origxy.y - 1, origxy.y, origxy.y + 1); // C4 F4 I4
|
||||
|
||||
vec2 f = fract(vec2(float(xy.x) / float(params.scale), float(xy.y) / float(params.scale)));
|
||||
|
||||
//---------------------------------------
|
||||
// Input Pixel Mapping: |21|22|23|
|
||||
// 19|06|07|08|09
|
||||
// 18|05|00|01|10
|
||||
// 17|04|03|02|11
|
||||
// |15|14|13|
|
||||
|
||||
vec4 src[25];
|
||||
|
||||
src[21] = premultiply_alpha(readColorf(t1.xw));
|
||||
src[22] = premultiply_alpha(readColorf(t1.yw));
|
||||
src[23] = premultiply_alpha(readColorf(t1.zw));
|
||||
src[ 6] = premultiply_alpha(readColorf(t2.xw));
|
||||
src[ 7] = premultiply_alpha(readColorf(t2.yw));
|
||||
src[ 8] = premultiply_alpha(readColorf(t2.zw));
|
||||
src[ 5] = premultiply_alpha(readColorf(t3.xw));
|
||||
src[ 0] = premultiply_alpha(readColorf(t3.yw));
|
||||
src[ 1] = premultiply_alpha(readColorf(t3.zw));
|
||||
src[ 4] = premultiply_alpha(readColorf(t4.xw));
|
||||
src[ 3] = premultiply_alpha(readColorf(t4.yw));
|
||||
src[ 2] = premultiply_alpha(readColorf(t4.zw));
|
||||
src[15] = premultiply_alpha(readColorf(t5.xw));
|
||||
src[14] = premultiply_alpha(readColorf(t5.yw));
|
||||
src[13] = premultiply_alpha(readColorf(t5.zw));
|
||||
src[19] = premultiply_alpha(readColorf(t6.xy));
|
||||
src[18] = premultiply_alpha(readColorf(t6.xz));
|
||||
src[17] = premultiply_alpha(readColorf(t6.xw));
|
||||
src[ 9] = premultiply_alpha(readColorf(t7.xy));
|
||||
src[10] = premultiply_alpha(readColorf(t7.xz));
|
||||
src[11] = premultiply_alpha(readColorf(t7.xw));
|
||||
|
||||
float v[9];
|
||||
v[0] = reduce(src[0]);
|
||||
v[1] = reduce(src[1]);
|
||||
v[2] = reduce(src[2]);
|
||||
v[3] = reduce(src[3]);
|
||||
v[4] = reduce(src[4]);
|
||||
v[5] = reduce(src[5]);
|
||||
v[6] = reduce(src[6]);
|
||||
v[7] = reduce(src[7]);
|
||||
v[8] = reduce(src[8]);
|
||||
|
||||
ivec4 blendResult = ivec4(BLEND_NONE);
|
||||
|
||||
// Preprocess corners
|
||||
// Pixel Tap Mapping: --|--|--|--|--
|
||||
// --|--|07|08|--
|
||||
// --|05|00|01|10
|
||||
// --|04|03|02|11
|
||||
// --|--|14|13|--
|
||||
// Corner (1, 1)
|
||||
if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) {
|
||||
float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1]));
|
||||
float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2]));
|
||||
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02;
|
||||
blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
|
||||
}
|
||||
|
||||
// Pixel Tap Mapping: --|--|--|--|--
|
||||
// --|06|07|--|--
|
||||
// 18|05|00|01|--
|
||||
// 17|04|03|02|--
|
||||
// --|15|14|--|--
|
||||
// Corner (0, 1)
|
||||
if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) {
|
||||
float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0]));
|
||||
float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3]));
|
||||
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00;
|
||||
blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
|
||||
}
|
||||
|
||||
// Pixel Tap Mapping: --|--|22|23|--
|
||||
// --|06|07|08|09
|
||||
// --|05|00|01|10
|
||||
// --|--|03|02|--
|
||||
// --|--|--|--|--
|
||||
// Corner (1, 0)
|
||||
if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) {
|
||||
float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8]));
|
||||
float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1]));
|
||||
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08;
|
||||
blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
|
||||
}
|
||||
|
||||
// Pixel Tap Mapping: --|21|22|--|--
|
||||
// 19|06|07|08|--
|
||||
// 18|05|00|01|--
|
||||
// --|04|03|--|--
|
||||
// --|--|--|--|--
|
||||
// Corner (0, 0)
|
||||
if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) {
|
||||
float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7]));
|
||||
float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0]));
|
||||
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00;
|
||||
blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
|
||||
}
|
||||
|
||||
vec4 dst[16];
|
||||
dst[ 0] = src[0];
|
||||
dst[ 1] = src[0];
|
||||
dst[ 2] = src[0];
|
||||
dst[ 3] = src[0];
|
||||
dst[ 4] = src[0];
|
||||
dst[ 5] = src[0];
|
||||
dst[ 6] = src[0];
|
||||
dst[ 7] = src[0];
|
||||
dst[ 8] = src[0];
|
||||
dst[ 9] = src[0];
|
||||
dst[10] = src[0];
|
||||
dst[11] = src[0];
|
||||
dst[12] = src[0];
|
||||
dst[13] = src[0];
|
||||
dst[14] = src[0];
|
||||
dst[15] = src[0];
|
||||
|
||||
// Scale pixel
|
||||
if (IsBlendingNeeded(blendResult) == true) {
|
||||
float dist_01_04 = DistYCbCr(src[1], src[4]);
|
||||
float dist_03_08 = DistYCbCr(src[3], src[8]);
|
||||
bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]);
|
||||
bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]);
|
||||
bool needBlend = (blendResult[2] != BLEND_NONE);
|
||||
bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT ||
|
||||
((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) ||
|
||||
(blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) ||
|
||||
(IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false );
|
||||
|
||||
vec4 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3];
|
||||
dst[ 2] = mix(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00);
|
||||
dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00);
|
||||
dst[10] = mix(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00);
|
||||
dst[11] = mix(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[12] = mix(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00);
|
||||
dst[13] = mix(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[14] = mix(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00);
|
||||
dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00);
|
||||
|
||||
dist_01_04 = DistYCbCr(src[7], src[2]);
|
||||
dist_03_08 = DistYCbCr(src[1], src[6]);
|
||||
haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]);
|
||||
haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]);
|
||||
needBlend = (blendResult[1] != BLEND_NONE);
|
||||
doLineBlend = ( blendResult[1] >= BLEND_DOMINANT ||
|
||||
!((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) ||
|
||||
(blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) ||
|
||||
(IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) );
|
||||
|
||||
blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1];
|
||||
dst[ 1] = mix(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00);
|
||||
dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00);
|
||||
dst[ 7] = mix(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00);
|
||||
dst[ 8] = mix(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[ 9] = mix(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00);
|
||||
dst[10] = mix(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[11] = mix(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00);
|
||||
dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00);
|
||||
|
||||
dist_01_04 = DistYCbCr(src[5], src[8]);
|
||||
dist_03_08 = DistYCbCr(src[7], src[4]);
|
||||
haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]);
|
||||
haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]);
|
||||
needBlend = (blendResult[0] != BLEND_NONE);
|
||||
doLineBlend = ( blendResult[0] >= BLEND_DOMINANT ||
|
||||
!((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) ||
|
||||
(blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) ||
|
||||
(IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) );
|
||||
|
||||
blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7];
|
||||
dst[ 0] = mix(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00);
|
||||
dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00);
|
||||
dst[ 4] = mix(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00);
|
||||
dst[ 5] = mix(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[ 6] = mix(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00);
|
||||
dst[ 7] = mix(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[ 8] = mix(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00);
|
||||
dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00);
|
||||
|
||||
dist_01_04 = DistYCbCr(src[3], src[6]);
|
||||
dist_03_08 = DistYCbCr(src[5], src[2]);
|
||||
haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]);
|
||||
haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]);
|
||||
needBlend = (blendResult[3] != BLEND_NONE);
|
||||
doLineBlend = ( blendResult[3] >= BLEND_DOMINANT ||
|
||||
!((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) ||
|
||||
(blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) ||
|
||||
(IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) );
|
||||
|
||||
blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5];
|
||||
dst[ 3] = mix(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00);
|
||||
dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00);
|
||||
dst[13] = mix(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00);
|
||||
dst[14] = mix(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[15] = mix(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00);
|
||||
dst[ 4] = mix(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00);
|
||||
dst[ 5] = mix(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00);
|
||||
dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00);
|
||||
}
|
||||
|
||||
// select output pixel
|
||||
vec4 res = mix(mix(mix(mix(dst[ 6], dst[ 7], step(0.25, f.x)),
|
||||
mix(dst[ 8], dst[ 9], step(0.75, f.x)),
|
||||
step(0.50, f.x)),
|
||||
mix(mix(dst[ 5], dst[ 0], step(0.25, f.x)),
|
||||
mix(dst[ 1], dst[10], step(0.75, f.x)),
|
||||
step(0.50, f.x)),
|
||||
step(0.25, f.y)),
|
||||
mix(mix(mix(dst[ 4], dst[ 3], step(0.25, f.x)),
|
||||
mix(dst[ 2], dst[11], step(0.75, f.x)),
|
||||
step(0.50, f.x)),
|
||||
mix(mix(dst[15], dst[14], step(0.25, f.x)),
|
||||
mix(dst[13], dst[12], step(0.75, f.x)),
|
||||
step(0.50, f.x)),
|
||||
step(0.75, f.y)),
|
||||
step(0.50, f.y));
|
||||
|
||||
return postdivide_alpha(res);
|
||||
}
|
||||
|
||||
uint applyScalingu(uvec2 origxy, uvec2 xy) {
|
||||
return packUnorm4x8(applyScalingf(origxy, xy));
|
||||
}
|
||||
)";
|
||||
|
||||
const char *copyShader = R"(
|
||||
#version 450
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
// No idea what's optimal here...
|
||||
#define WORKGROUP_SIZE 16
|
||||
layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1) in;
|
||||
|
||||
layout(std430, binding = 1) buffer Buf1 {
|
||||
uint data[];
|
||||
} buf1;
|
||||
|
||||
layout(std430, binding = 2) buffer Buf2 {
|
||||
uint data[];
|
||||
} buf2;
|
||||
|
||||
layout(push_constant) uniform Params {
|
||||
int width;
|
||||
int height;
|
||||
int scale;
|
||||
int fmt;
|
||||
} params;
|
||||
|
||||
uint readColoru(uvec2 p) {
|
||||
// Note that if the pixels are packed, we can do multiple stores
|
||||
// and only launch this compute shader for every N pixels,
|
||||
// by slicing the width in half and multiplying x by 2, for example.
|
||||
if (params.fmt == 0) {
|
||||
return buf1.data[p.y * params.width + p.x];
|
||||
} else {
|
||||
uint offset = p.y * params.width + p.x;
|
||||
uint data = buf1.data[offset / 2];
|
||||
if ((offset & 1) != 0) {
|
||||
data = data >> 16;
|
||||
}
|
||||
if (params.fmt == 6) {
|
||||
uint r = ((data << 3) & 0xF8) | ((data >> 2) & 0x07);
|
||||
uint g = ((data >> 3) & 0xFC) | ((data >> 9) & 0x03);
|
||||
uint b = ((data >> 8) & 0xF8) | ((data >> 13) & 0x07);
|
||||
return 0xFF000000 | (b << 16) | (g << 8) | r;
|
||||
} else if (params.fmt == 5) {
|
||||
uint r = ((data << 3) & 0xF8) | ((data >> 2) & 0x07);
|
||||
uint g = ((data >> 2) & 0xF8) | ((data >> 7) & 0x07);
|
||||
uint b = ((data >> 7) & 0xF8) | ((data >> 12) & 0x07);
|
||||
uint a = ((data >> 15) & 0x01) == 0 ? 0x00 : 0xFF;
|
||||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
} else if (params.fmt == 4) {
|
||||
uint r = (data & 0x0F) | ((data << 4) & 0x0F);
|
||||
uint g = (data & 0xF0) | ((data >> 4) & 0x0F);
|
||||
uint b = ((data >> 8) & 0x0F) | ((data >> 4) & 0xF0);
|
||||
uint a = ((data >> 12) & 0x0F) | ((data >> 8) & 0xF0);
|
||||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec4 readColorf(uvec2 p) {
|
||||
return unpackUnorm4x8(readColoru(p));
|
||||
}
|
||||
|
||||
%s
|
||||
|
||||
void main() {
|
||||
uvec2 xy = gl_GlobalInvocationID.xy;
|
||||
// Kill off any out-of-image threads to avoid stray writes.
|
||||
// Should only happen on the tiniest mipmaps as PSP textures are power-of-2,
|
||||
// and we use a 16x16 workgroup size.
|
||||
if (xy.x >= params.width || xy.y >= params.height)
|
||||
return;
|
||||
|
||||
uvec2 origxy = xy / params.scale;
|
||||
if (params.scale == 1) {
|
||||
buf2.data[xy.y * params.width + xy.x] = readColoru(origxy);
|
||||
} else {
|
||||
buf2.data[xy.y * params.width + xy.x] = applyScalingu(origxy, xy);
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
const char *uploadShader = R"(
|
||||
#version 450
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
// No idea what's optimal here...
|
||||
#define WORKGROUP_SIZE 16
|
||||
layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1) in;
|
||||
|
||||
uniform layout(binding = 0, rgba8) writeonly image2D img;
|
||||
|
||||
layout(std430, binding = 1) buffer Buf {
|
||||
uint data[];
|
||||
} buf;
|
||||
|
||||
layout(push_constant) uniform Params {
|
||||
int width;
|
||||
int height;
|
||||
int scale;
|
||||
int fmt;
|
||||
} params;
|
||||
|
||||
uint readColoru(uvec2 p) {
|
||||
// Note that if the pixels are packed, we can do multiple stores
|
||||
// and only launch this compute shader for every N pixels,
|
||||
// by slicing the width in half and multiplying x by 2, for example.
|
||||
if (params.fmt == 0) {
|
||||
return buf.data[p.y * params.width + p.x];
|
||||
} else {
|
||||
uint offset = p.y * params.width + p.x;
|
||||
uint data = buf.data[offset / 2];
|
||||
if ((offset & 1) != 0) {
|
||||
data = data >> 16;
|
||||
}
|
||||
if (params.fmt == 6) {
|
||||
uint r = ((data << 3) & 0xF8) | ((data >> 2) & 0x07);
|
||||
uint g = ((data >> 3) & 0xFC) | ((data >> 9) & 0x03);
|
||||
uint b = ((data >> 8) & 0xF8) | ((data >> 13) & 0x07);
|
||||
return 0xFF000000 | (b << 16) | (g << 8) | r;
|
||||
} else if (params.fmt == 5) {
|
||||
uint r = ((data << 3) & 0xF8) | ((data >> 2) & 0x07);
|
||||
uint g = ((data >> 2) & 0xF8) | ((data >> 7) & 0x07);
|
||||
uint b = ((data >> 7) & 0xF8) | ((data >> 12) & 0x07);
|
||||
uint a = ((data >> 15) & 0x01) == 0 ? 0x00 : 0xFF;
|
||||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
} else if (params.fmt == 4) {
|
||||
uint r = (data & 0x0F) | ((data << 4) & 0x0F);
|
||||
uint g = (data & 0xF0) | ((data >> 4) & 0x0F);
|
||||
uint b = ((data >> 8) & 0x0F) | ((data >> 4) & 0xF0);
|
||||
uint a = ((data >> 12) & 0x0F) | ((data >> 8) & 0xF0);
|
||||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec4 readColorf(uvec2 p) {
|
||||
// Unpack the color (we could look it up in a CLUT here if we wanted...)
|
||||
// It's a bit silly that we need to unpack to float and then have imageStore repack,
|
||||
// but the alternative is to store to a buffer, and then launch a vkCmdCopyBufferToImage instead.
|
||||
return unpackUnorm4x8(readColoru(p));
|
||||
}
|
||||
|
||||
%s
|
||||
|
||||
void main() {
|
||||
uvec2 xy = gl_GlobalInvocationID.xy;
|
||||
// Kill off any out-of-image threads to avoid stray writes.
|
||||
// Should only happen on the tiniest mipmaps as PSP textures are power-of-2,
|
||||
// and we use a 16x16 workgroup size.
|
||||
if (xy.x >= params.width || xy.y >= params.height)
|
||||
return;
|
||||
|
||||
uvec2 origxy = xy / params.scale;
|
||||
if (params.scale == 1) {
|
||||
imageStore(img, ivec2(xy.x, xy.y), readColorf(origxy));
|
||||
} else {
|
||||
imageStore(img, ivec2(xy.x, xy.y), applyScalingf(origxy, xy));
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
SamplerCache::~SamplerCache() {
|
||||
DeviceLost();
|
||||
}
|
||||
@ -141,7 +585,8 @@ std::vector<std::string> SamplerCache::DebugGetSamplerIDs() const {
|
||||
TextureCacheVulkan::TextureCacheVulkan(Draw::DrawContext *draw, VulkanContext *vulkan)
|
||||
: TextureCacheCommon(draw),
|
||||
vulkan_(vulkan),
|
||||
samplerCache_(vulkan) {
|
||||
samplerCache_(vulkan),
|
||||
computeShaderManager_(vulkan) {
|
||||
timesInvalidatedAllThisFrame_ = 0;
|
||||
DeviceRestore(vulkan, draw);
|
||||
SetupTextureDecoder();
|
||||
@ -180,6 +625,13 @@ void TextureCacheVulkan::DeviceLost() {
|
||||
if (samplerNearest_)
|
||||
vulkan_->Delete().QueueDeleteSampler(samplerNearest_);
|
||||
|
||||
if (uploadCS_ != VK_NULL_HANDLE)
|
||||
vulkan_->Delete().QueueDeleteShaderModule(uploadCS_);
|
||||
if (copyCS_ != VK_NULL_HANDLE)
|
||||
vulkan_->Delete().QueueDeleteShaderModule(copyCS_);
|
||||
|
||||
computeShaderManager_.DeviceLost();
|
||||
|
||||
nextTexture_ = nullptr;
|
||||
}
|
||||
|
||||
@ -200,6 +652,19 @@ void TextureCacheVulkan::DeviceRestore(VulkanContext *vulkan, Draw::DrawContext
|
||||
samp.minFilter = VK_FILTER_NEAREST;
|
||||
samp.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &samplerNearest_);
|
||||
|
||||
std::string error;
|
||||
std::string fullUploadShader = StringFromFormat(uploadShader, shader4xbrz);
|
||||
std::string fullCopyShader = StringFromFormat(copyShader, shader4xbrz);
|
||||
|
||||
if (g_Config.bTexHardwareScaling) {
|
||||
uploadCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullUploadShader.c_str(), &error);
|
||||
_dbg_assert_msg_(G3D, uploadCS_ != VK_NULL_HANDLE, "failed to compile upload shader");
|
||||
copyCS_ = CompileShaderModule(vulkan_, VK_SHADER_STAGE_COMPUTE_BIT, fullCopyShader.c_str(), &error);
|
||||
_dbg_assert_msg_(G3D, copyCS_!= VK_NULL_HANDLE, "failed to compile copy shader");
|
||||
}
|
||||
|
||||
computeShaderManager_.DeviceRestore(vulkan);
|
||||
}
|
||||
|
||||
void TextureCacheVulkan::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
|
||||
@ -272,10 +737,12 @@ void TextureCacheVulkan::StartFrame() {
|
||||
}
|
||||
|
||||
allocator_->Begin();
|
||||
computeShaderManager_.BeginFrame();
|
||||
}
|
||||
|
||||
void TextureCacheVulkan::EndFrame() {
|
||||
allocator_->End();
|
||||
computeShaderManager_.EndFrame();
|
||||
|
||||
if (texelsScaledThisFrame_) {
|
||||
// INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
|
||||
@ -570,14 +1037,14 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
// Don't scale the PPGe texture.
|
||||
if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
|
||||
scaleFactor = 1;
|
||||
if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) {
|
||||
if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1 && !g_Config.bTexHardwareScaling) {
|
||||
// Remember for later that we /wanted/ to scale this texture.
|
||||
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
|
||||
scaleFactor = 1;
|
||||
}
|
||||
|
||||
if (scaleFactor != 1) {
|
||||
if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
|
||||
if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED && !g_Config.bTexHardwareScaling) {
|
||||
entry->status |= TexCacheEntry::STATUS_TO_SCALE;
|
||||
scaleFactor = 1;
|
||||
} else {
|
||||
@ -597,6 +1064,9 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
actualFmt = ToVulkanFormat(replaced.Format(0));
|
||||
}
|
||||
|
||||
bool computeUpload = false;
|
||||
bool computeCopy = false;
|
||||
|
||||
{
|
||||
delete entry->vkTex;
|
||||
entry->vkTex = new VulkanTexture(vulkan_);
|
||||
@ -621,11 +1091,29 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
break;
|
||||
}
|
||||
|
||||
VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
// If we want to use the GE debugger, we should add VK_IMAGE_USAGE_TRANSFER_SRC_BIT too...
|
||||
|
||||
// Compute experiment
|
||||
if (actualFmt == VULKAN_8888_FORMAT && scaleFactor > 1 && g_Config.bTexHardwareScaling) {
|
||||
// Enable the experiment you want.
|
||||
if (uploadCS_ != VK_NULL_HANDLE)
|
||||
computeUpload = true;
|
||||
else if (copyCS_ != VK_NULL_HANDLE)
|
||||
computeCopy = true;
|
||||
}
|
||||
|
||||
if (computeUpload) {
|
||||
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
char texName[128]{};
|
||||
snprintf(texName, sizeof(texName), "Texture%08x", entry->addr);
|
||||
image->SetTag(texName);
|
||||
|
||||
bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, mapping);
|
||||
bool allocSuccess = image->CreateDirect(cmdInit, allocator_, w * scaleFactor, h * scaleFactor, maxLevel + 1, actualFmt, imageLayout, usage, mapping);
|
||||
if (!allocSuccess && !lowMemoryMode_) {
|
||||
WARN_LOG_REPORT(G3D, "Texture cache ran out of GPU memory; switching to low memory mode");
|
||||
lowMemoryMode_ = true;
|
||||
@ -677,6 +1165,9 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
if (replaced.Valid()) {
|
||||
replaced.GetSize(i, mipWidth, mipHeight);
|
||||
}
|
||||
int srcBpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
|
||||
int srcStride = mipWidth * srcBpp;
|
||||
int srcSize = srcStride * mipHeight;
|
||||
int bpp = actualFmt == VULKAN_8888_FORMAT ? 4 : 2;
|
||||
int stride = (mipWidth * bpp + 15) & ~15;
|
||||
int size = stride * mipHeight;
|
||||
@ -684,22 +1175,85 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
VkBuffer texBuf;
|
||||
// nvidia returns 1 but that can't be healthy... let's align by 16 as a minimum.
|
||||
int pushAlignment = std::max(16, (int)vulkan_->GetPhysicalDeviceProperties().properties.limits.optimalBufferCopyOffsetAlignment);
|
||||
void *data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
|
||||
void *data;
|
||||
bool dataScaled = true;
|
||||
if (replaced.Valid()) {
|
||||
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
|
||||
replaced.Load(i, data, stride);
|
||||
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
|
||||
} else {
|
||||
auto dispatchCompute = [&](VkDescriptorSet descSet) {
|
||||
struct Params { int x; int y; int s; int fmt; } params{ mipWidth, mipHeight, scaleFactor, 0 };
|
||||
if (dstFmt == VULKAN_4444_FORMAT) {
|
||||
params.fmt = 4;
|
||||
} else if (dstFmt == VULKAN_1555_FORMAT) {
|
||||
params.fmt = 5;
|
||||
} else if (dstFmt == VULKAN_565_FORMAT) {
|
||||
params.fmt = 6;
|
||||
}
|
||||
vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr);
|
||||
vkCmdPushConstants(cmdInit, computeShaderManager_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms);
|
||||
vkCmdDispatch(cmdInit, (mipWidth + 15) / 16, (mipHeight + 15) / 16, 1);
|
||||
};
|
||||
|
||||
if (fakeMipmap) {
|
||||
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
|
||||
LoadTextureLevel(*entry, (uint8_t *)data, stride, level, scaleFactor, dstFmt);
|
||||
entry->vkTex->UploadMip(cmdInit, 0, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
|
||||
break;
|
||||
} else {
|
||||
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
|
||||
if (computeUpload) {
|
||||
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(srcSize, &bufferOffset, &texBuf, pushAlignment);
|
||||
dataScaled = false;
|
||||
LoadTextureLevel(*entry, (uint8_t *)data, srcStride, i, 1, dstFmt);
|
||||
// This format can be used with storage images.
|
||||
VkImageView view = entry->vkTex->CreateViewForMip(i);
|
||||
VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(view, texBuf, bufferOffset, srcSize);
|
||||
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(uploadCS_));
|
||||
dispatchCompute(descSet);
|
||||
vulkan_->Delete().QueueDeleteImageView(view);
|
||||
} else if (computeCopy) {
|
||||
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(srcSize, &bufferOffset, &texBuf, pushAlignment);
|
||||
dataScaled = false;
|
||||
LoadTextureLevel(*entry, (uint8_t *)data, srcStride, i, 1, dstFmt);
|
||||
// Simple test of using a "copy shader" before the upload. This one could unswizzle or whatever
|
||||
// and will work for any texture format including 16-bit as long as the shader is written to pack it into int32 size bits
|
||||
// which is the smallest possible write.
|
||||
VkBuffer localBuf;
|
||||
uint32_t localOffset;
|
||||
uint32_t localSize = size;
|
||||
localOffset = (uint32_t)drawEngine_->GetPushBufferLocal()->Allocate(localSize, &localBuf);
|
||||
|
||||
VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(VK_NULL_HANDLE, texBuf, bufferOffset, srcSize, localBuf, localOffset, localSize);
|
||||
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(copyCS_));
|
||||
dispatchCompute(descSet);
|
||||
|
||||
// After the compute, before the copy, we need a memory barrier.
|
||||
VkBufferMemoryBarrier barrier{ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barrier.buffer = localBuf;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.offset = localOffset;
|
||||
barrier.size = localSize;
|
||||
vkCmdPipelineBarrier(cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, 0, nullptr, 1, &barrier, 0, nullptr);
|
||||
|
||||
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, localBuf, localOffset, stride / bpp);
|
||||
} else {
|
||||
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
|
||||
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
|
||||
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
|
||||
}
|
||||
}
|
||||
if (replacer_.Enabled()) {
|
||||
replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, mipWidth, mipHeight);
|
||||
// When hardware texture scaling is enabled, this saves the original.
|
||||
int w = dataScaled ? mipWidth : mipWidth / scaleFactor;
|
||||
int h = dataScaled ? mipHeight : mipHeight / scaleFactor;
|
||||
replacer_.NotifyTextureDecoded(replacedInfo, data, stride, i, w, h);
|
||||
}
|
||||
}
|
||||
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
|
||||
}
|
||||
|
||||
if (maxLevel == 0) {
|
||||
@ -710,7 +1264,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
||||
if (replaced.Valid()) {
|
||||
entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
|
||||
}
|
||||
entry->vkTex->EndCreate(cmdInit);
|
||||
entry->vkTex->EndCreate(cmdInit, false, computeUpload ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
}
|
||||
|
||||
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "Common/Vulkan/VulkanContext.h"
|
||||
#include "GPU/Vulkan/TextureScalerVulkan.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
#include "GPU/Vulkan/VulkanUtil.h"
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
class FramebufferManagerVulkan;
|
||||
@ -126,6 +127,8 @@ private:
|
||||
VulkanDeviceAllocator *allocator_ = nullptr;
|
||||
VulkanPushBuffer *push_ = nullptr;
|
||||
|
||||
VulkanComputeShaderManager computeShaderManager_;
|
||||
|
||||
SamplerCache samplerCache_;
|
||||
|
||||
TextureScalerVulkan scaler;
|
||||
@ -142,6 +145,9 @@ private:
|
||||
DrawEngineVulkan *drawEngine_;
|
||||
Vulkan2D *vulkan2D_;
|
||||
|
||||
VkShaderModule uploadCS_ = VK_NULL_HANDLE;
|
||||
VkShaderModule copyCS_ = VK_NULL_HANDLE;
|
||||
|
||||
// Bound state to emulate an API similar to the others
|
||||
VkImageView imageView_ = VK_NULL_HANDLE;
|
||||
VkSampler curSampler_ = VK_NULL_HANDLE;
|
||||
|
@ -16,6 +16,7 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "base/basictypes.h"
|
||||
#include "base/stringutil.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/Vulkan/VulkanContext.h"
|
||||
#include "GPU/Vulkan/VulkanUtil.h"
|
||||
@ -51,17 +52,14 @@ void Vulkan2D::DestroyDeviceObjects() {
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
if (descriptorSetLayout_ != VK_NULL_HANDLE) {
|
||||
vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_);
|
||||
descriptorSetLayout_ = VK_NULL_HANDLE;
|
||||
}
|
||||
if (pipelineLayout_ != VK_NULL_HANDLE) {
|
||||
vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_);
|
||||
pipelineLayout_ = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// pipelineBasicTex_ and pipelineBasicTex_ come from vulkan2D_.
|
||||
if (pipelineCache_ != VK_NULL_HANDLE) {
|
||||
vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_);
|
||||
pipelineCache_ = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -388,7 +386,7 @@ VkShaderModule CompileShaderModule(VulkanContext *vulkan, VkShaderStageFlagBits
|
||||
ERROR_LOG(G3D, "Error in shader compilation!");
|
||||
}
|
||||
ERROR_LOG(G3D, "Messages: %s", error->c_str());
|
||||
ERROR_LOG(G3D, "Shader source:\n%s", code);
|
||||
ERROR_LOG(G3D, "Shader source:\n%s", LineNumberString(code).c_str());
|
||||
OutputDebugStringUTF8("Messages:\n");
|
||||
OutputDebugStringUTF8(error->c_str());
|
||||
return VK_NULL_HANDLE;
|
||||
@ -401,3 +399,169 @@ VkShaderModule CompileShaderModule(VulkanContext *vulkan, VkShaderStageFlagBits
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VulkanComputeShaderManager::VulkanComputeShaderManager(VulkanContext *vulkan) : vulkan_(vulkan), pipelines_(8) {
|
||||
}
|
||||
VulkanComputeShaderManager::~VulkanComputeShaderManager() {}
|
||||
|
||||
void VulkanComputeShaderManager::InitDeviceObjects() {
|
||||
VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO };
|
||||
VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_);
|
||||
assert(VK_SUCCESS == res);
|
||||
|
||||
VkDescriptorSetLayoutBinding bindings[3] = {};
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[0].binding = 0;
|
||||
bindings[1].descriptorCount = 1;
|
||||
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[1].binding = 1;
|
||||
bindings[2].descriptorCount = 1;
|
||||
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
bindings[2].binding = 2;
|
||||
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
|
||||
dsl.bindingCount = ARRAY_SIZE(bindings);
|
||||
dsl.pBindings = bindings;
|
||||
res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_);
|
||||
assert(VK_SUCCESS == res);
|
||||
|
||||
VkDescriptorPoolSize dpTypes[2];
|
||||
dpTypes[0].descriptorCount = 8192;
|
||||
dpTypes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
dpTypes[1].descriptorCount = 4096;
|
||||
dpTypes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
|
||||
VkDescriptorPoolCreateInfo dp = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
|
||||
dp.flags = 0; // Don't want to mess around with individually freeing these, let's go fixed each frame and zap the whole array. Might try the dynamic approach later.
|
||||
dp.maxSets = 4096; // GTA can end up creating more than 1000 textures in the first frame!
|
||||
dp.pPoolSizes = dpTypes;
|
||||
dp.poolSizeCount = ARRAY_SIZE(dpTypes);
|
||||
for (int i = 0; i < ARRAY_SIZE(frameData_); i++) {
|
||||
VkResult res = vkCreateDescriptorPool(vulkan_->GetDevice(), &dp, nullptr, &frameData_[i].descPool);
|
||||
assert(VK_SUCCESS == res);
|
||||
}
|
||||
|
||||
VkPushConstantRange push = {};
|
||||
push.offset = 0;
|
||||
push.size = 16;
|
||||
push.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
|
||||
pl.pPushConstantRanges = &push;
|
||||
pl.pushConstantRangeCount = 1;
|
||||
pl.setLayoutCount = 1;
|
||||
pl.pSetLayouts = &descriptorSetLayout_;
|
||||
pl.flags = 0;
|
||||
res = vkCreatePipelineLayout(device, &pl, nullptr, &pipelineLayout_);
|
||||
assert(VK_SUCCESS == res);
|
||||
}
|
||||
|
||||
void VulkanComputeShaderManager::DestroyDeviceObjects() {
|
||||
for (int i = 0; i < ARRAY_SIZE(frameData_); i++) {
|
||||
vulkan_->Delete().QueueDeleteDescriptorPool(frameData_[i].descPool);
|
||||
}
|
||||
if (descriptorSetLayout_) {
|
||||
vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_);
|
||||
}
|
||||
pipelines_.Iterate([&](const PipelineKey &key, VkPipeline pipeline) {
|
||||
vulkan_->Delete().QueueDeletePipeline(pipeline);
|
||||
});
|
||||
pipelines_.Clear();
|
||||
|
||||
if (pipelineLayout_) {
|
||||
vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_);
|
||||
}
|
||||
if (pipelineCache_ != VK_NULL_HANDLE) {
|
||||
vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_);
|
||||
}
|
||||
}
|
||||
|
||||
VkDescriptorSet VulkanComputeShaderManager::GetDescriptorSet(VkImageView image, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range, VkBuffer buffer2, VkDeviceSize offset2, VkDeviceSize range2) {
|
||||
int curFrame = vulkan_->GetCurFrame();
|
||||
FrameData &frameData = frameData_[curFrame];
|
||||
frameData_[curFrame].numDescriptors++;
|
||||
VkDescriptorSet desc;
|
||||
VkDescriptorSetAllocateInfo descAlloc = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
|
||||
descAlloc.pSetLayouts = &descriptorSetLayout_;
|
||||
descAlloc.descriptorPool = frameData.descPool;
|
||||
descAlloc.descriptorSetCount = 1;
|
||||
VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkWriteDescriptorSet writes[2]{};
|
||||
int n = 0;
|
||||
VkDescriptorImageInfo imageInfo = {};
|
||||
VkDescriptorBufferInfo bufferInfo[2] = {};
|
||||
if (image) {
|
||||
imageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
imageInfo.imageView = image;
|
||||
imageInfo.sampler = VK_NULL_HANDLE;
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].dstBinding = 0;
|
||||
writes[n].pImageInfo = &imageInfo;
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
}
|
||||
bufferInfo[0].buffer = buffer;
|
||||
bufferInfo[0].offset = offset;
|
||||
bufferInfo[0].range = range;
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].dstBinding = 1;
|
||||
writes[n].pBufferInfo = &bufferInfo[0];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
if (buffer2) {
|
||||
bufferInfo[1].buffer = buffer2;
|
||||
bufferInfo[1].offset = offset2;
|
||||
bufferInfo[1].range = range2;
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].dstBinding = 2;
|
||||
writes[n].pBufferInfo = &bufferInfo[1];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
}
|
||||
vkUpdateDescriptorSets(vulkan_->GetDevice(), n, writes, 0, nullptr);
|
||||
return desc;
|
||||
}
|
||||
|
||||
VkPipeline VulkanComputeShaderManager::GetPipeline(VkShaderModule cs) {
|
||||
PipelineKey key{ cs };
|
||||
VkPipeline pipeline = pipelines_.Get(key);
|
||||
if (pipeline)
|
||||
return pipeline;
|
||||
|
||||
VkComputePipelineCreateInfo pci{ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
|
||||
pci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
pci.stage.module = cs;
|
||||
pci.stage.pName = "main";
|
||||
pci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pci.layout = pipelineLayout_;
|
||||
pci.flags = 0;
|
||||
|
||||
vkCreateComputePipelines(vulkan_->GetDevice(), pipelineCache_, 1, &pci, nullptr, &pipeline);
|
||||
|
||||
pipelines_.Insert(key, pipeline);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void VulkanComputeShaderManager::BeginFrame() {
|
||||
int curFrame = vulkan_->GetCurFrame();
|
||||
FrameData &frame = frameData_[curFrame];
|
||||
frameData_[curFrame].numDescriptors = 0;
|
||||
vkResetDescriptorPool(vulkan_->GetDevice(), frame.descPool, 0);
|
||||
}
|
||||
|
||||
void VulkanComputeShaderManager::EndFrame() {
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <tuple>
|
||||
#include <map>
|
||||
|
||||
#include "Common/Hashmaps.h"
|
||||
#include "Common/Vulkan/VulkanContext.h"
|
||||
#include "Common/Vulkan/VulkanLoader.h"
|
||||
#include "Common/Vulkan/VulkanImage.h"
|
||||
@ -124,5 +125,52 @@ private:
|
||||
std::vector<VkPipeline> keptPipelines_;
|
||||
};
|
||||
|
||||
// Manager for compute shaders that upload things (and those have two bindings: a storage buffer to read from and an image to write to).
|
||||
class VulkanComputeShaderManager {
|
||||
public:
|
||||
VulkanComputeShaderManager(VulkanContext *vulkan);
|
||||
~VulkanComputeShaderManager();
|
||||
|
||||
void DeviceLost() {
|
||||
DestroyDeviceObjects();
|
||||
}
|
||||
void DeviceRestore(VulkanContext *vulkan) {
|
||||
vulkan_ = vulkan;
|
||||
InitDeviceObjects();
|
||||
}
|
||||
|
||||
// Note: This doesn't cache. The descriptor is for immediate use only.
|
||||
VkDescriptorSet GetDescriptorSet(VkImageView image, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range, VkBuffer buffer2 = VK_NULL_HANDLE, VkDeviceSize offset2 = 0, VkDeviceSize range2 = 0);
|
||||
|
||||
// This of course caches though.
|
||||
VkPipeline GetPipeline(VkShaderModule cs);
|
||||
VkPipelineLayout GetPipelineLayout() const { return pipelineLayout_; }
|
||||
|
||||
void BeginFrame();
|
||||
void EndFrame();
|
||||
|
||||
private:
|
||||
void InitDeviceObjects();
|
||||
void DestroyDeviceObjects();
|
||||
|
||||
VulkanContext *vulkan_ = nullptr;
|
||||
VkPipelineCache cache_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptorSetLayout_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout pipelineLayout_ = VK_NULL_HANDLE;
|
||||
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
|
||||
|
||||
struct FrameData {
|
||||
VkDescriptorPool descPool;
|
||||
int numDescriptors;
|
||||
};
|
||||
FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];
|
||||
|
||||
struct PipelineKey {
|
||||
VkShaderModule module;
|
||||
};
|
||||
|
||||
DenseHashMap<PipelineKey, VkPipeline, (VkPipeline)VK_NULL_HANDLE> pipelines_;
|
||||
};
|
||||
|
||||
|
||||
VkShaderModule CompileShaderModule(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code, std::string *error);
|
||||
|
Loading…
x
Reference in New Issue
Block a user