diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index 7a44b87a84..2bba8d71e6 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -265,6 +265,18 @@ void vulkan_sync_texture_to_cpu(vk_t *vk, const struct vk_texture *tex) vkInvalidateMappedMemoryRanges(vk->context->device, 1, &range); } +static unsigned vulkan_num_miplevels(unsigned width, unsigned height) +{ + unsigned size = MAX(width, height); + unsigned levels = 0; + while (size) + { + levels++; + size >>= 1; + } + return levels; +} + struct vk_texture vulkan_create_texture(vk_t *vk, struct vk_texture *old, unsigned width, unsigned height, @@ -284,6 +296,7 @@ struct vk_texture vulkan_create_texture(vk_t *vk, VkCommandBufferAllocateInfo cmd_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + unsigned i; memset(&tex, 0, sizeof(tex)); @@ -292,8 +305,19 @@ struct vk_texture vulkan_create_texture(vk_t *vk, info.extent.width = width; info.extent.height = height; info.extent.depth = 1; - info.mipLevels = 1; info.arrayLayers = 1; + + /* For simplicity, always build mipmaps for + * static textures, samplers can be used to enable it dynamically. + */ + if (type == VULKAN_TEXTURE_STATIC) + { + info.mipLevels = vulkan_num_miplevels(width, height); + tex.mipmap = true; + } + else + info.mipLevels = 1; + info.samples = VK_SAMPLE_COUNT_1_BIT; if (type == VULKAN_TEXTURE_STREAMED) @@ -317,7 +341,9 @@ struct vk_texture vulkan_create_texture(vk_t *vk, case VULKAN_TEXTURE_STATIC: retro_assert(initial && "Static textures must have initial data.\n"); info.tiling = VK_IMAGE_TILING_OPTIMAL; - info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; break; @@ -462,7 +488,7 @@ struct vk_texture vulkan_create_texture(vk_t *vk, view.components.a = VK_COMPONENT_SWIZZLE_A; } view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - view.subresourceRange.levelCount = 1; + view.subresourceRange.levelCount = info.mipLevels; view.subresourceRange.layerCount = 1; vkCreateImageView(device, &view, NULL, &tex.view); @@ -526,8 +552,13 @@ struct vk_texture vulkan_create_texture(vk_t *vk, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + /* If doing mipmapping on upload, keep in general so we can easily do transfers to + * and transfers from the images without having to + * mess around with lots of extra transitions at per-level granularity. + */ vulkan_image_layout_transition(vk, staging, tex.image, - VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_UNDEFINED, + tex.mipmap ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -542,16 +573,69 @@ struct vk_texture vulkan_create_texture(vk_t *vk, vkCmdCopyImage(staging, tmp.image, VK_IMAGE_LAYOUT_GENERAL, - tex.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + tex.image, + tex.mipmap ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); - vulkan_image_layout_transition(vk, staging, tex.image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + if (tex.mipmap) + { + for (i = 1; i < info.mipLevels; i++) + { + VkImageBlit blit_region; + unsigned src_width = MAX(width >> (i - 1), 1); + unsigned src_height = MAX(height >> (i - 1), 1); + unsigned target_width = MAX(width >> i, 1); + unsigned target_height = MAX(height >> i, 1); + memset(&blit_region, 0, sizeof(blit_region)); + + blit_region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit_region.srcSubresource.mipLevel = i - 1; + blit_region.srcSubresource.baseArrayLayer = 0; + blit_region.srcSubresource.layerCount = 1; + blit_region.dstSubresource = blit_region.srcSubresource; + blit_region.dstSubresource.mipLevel = i; + blit_region.srcOffsets[1].x = src_width; + blit_region.srcOffsets[1].y = src_height; + blit_region.srcOffsets[1].z = 1; + blit_region.dstOffsets[1].x = target_width; + blit_region.dstOffsets[1].y = target_height; + blit_region.dstOffsets[1].z = 1; + + /* Only injects execution and memory barriers, + * not actual transition. */ + vulkan_image_layout_transition(vk, staging, tex.image, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + vkCmdBlitImage(staging, + tex.image, VK_IMAGE_LAYOUT_GENERAL, + tex.image, VK_IMAGE_LAYOUT_GENERAL, + 1, &blit_region, VK_FILTER_LINEAR); + } + + /* Complete our texture. */ + vulkan_image_layout_transition(vk, staging, tex.image, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + else + { + vulkan_image_layout_transition(vk, staging, tex.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } vkEndCommandBuffer(staging); submit_info.commandBufferCount = 1; @@ -1627,6 +1711,7 @@ bool vulkan_context_init(gfx_ctx_vulkan_data_t *vk, info.pfnCallback = vulkan_debug_cb; vkCreateDebugReportCallbackEXT(vk->context.instance, &info, NULL, &vk->context.debug_callback); } + RARCH_LOG("[Vulkan]: Enabling Vulkan debug layers.\n"); #endif /* Try different API versions if driver has compatible diff --git a/gfx/common/vulkan_common.h b/gfx/common/vulkan_common.h index b182d5eb8b..a999be006a 100644 --- a/gfx/common/vulkan_common.h +++ b/gfx/common/vulkan_common.h @@ -173,6 +173,7 @@ struct vk_texture enum vk_texture_type type; bool default_smooth; bool need_manual_cache_management; + bool mipmap; }; struct vk_buffer @@ -352,6 +353,8 @@ typedef struct vk { VkSampler linear; VkSampler nearest; + VkSampler mipmap_nearest; + VkSampler mipmap_linear; } samplers; unsigned last_valid_index; diff --git a/gfx/drivers/vulkan.c b/gfx/drivers/vulkan.c index 4c788ac69f..15d31308f9 100644 --- a/gfx/drivers/vulkan.c +++ b/gfx/drivers/vulkan.c @@ -483,16 +483,31 @@ static void vulkan_init_samplers(vk_t *vk) vkCreateSampler(vk->context->device, &info, NULL, &vk->samplers.nearest); - info.magFilter = VK_FILTER_LINEAR; - info.minFilter = VK_FILTER_LINEAR; + info.magFilter = VK_FILTER_LINEAR; + info.minFilter = VK_FILTER_LINEAR; vkCreateSampler(vk->context->device, &info, NULL, &vk->samplers.linear); + + info.maxLod = VK_LOD_CLAMP_NONE; + info.magFilter = VK_FILTER_NEAREST; + info.minFilter = VK_FILTER_NEAREST; + info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + vkCreateSampler(vk->context->device, + &info, NULL, &vk->samplers.mipmap_nearest); + + info.magFilter = VK_FILTER_LINEAR; + info.minFilter = VK_FILTER_LINEAR; + info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + vkCreateSampler(vk->context->device, + &info, NULL, &vk->samplers.mipmap_linear); } static void vulkan_deinit_samplers(vk_t *vk) { vkDestroySampler(vk->context->device, vk->samplers.nearest, NULL); vkDestroySampler(vk->context->device, vk->samplers.linear, NULL); + vkDestroySampler(vk->context->device, vk->samplers.mipmap_nearest, NULL); + vkDestroySampler(vk->context->device, vk->samplers.mipmap_linear, NULL); } static void vulkan_init_buffers(vk_t *vk) @@ -651,6 +666,8 @@ static bool vulkan_init_default_filter_chain(vk_t *vk) info.gpu = vk->context->gpu; info.memory_properties = &vk->context->memory_properties; info.pipeline_cache = vk->pipelines.cache; + info.queue = vk->context->queue; + info.command_pool = vk->swapchain[vk->context->current_swapchain_index].cmd_pool; info.max_input_size.width = vk->tex_w; info.max_input_size.height = vk->tex_h; info.swapchain.viewport = vk->vk_vp; @@ -683,6 +700,8 @@ static bool vulkan_init_filter_chain_preset(vk_t *vk, const char *shader_path) info.gpu = vk->context->gpu; info.memory_properties = &vk->context->memory_properties; info.pipeline_cache = vk->pipelines.cache; + info.queue = vk->context->queue; + info.command_pool = vk->swapchain[vk->context->current_swapchain_index].cmd_pool; info.max_input_size.width = vk->tex_w; info.max_input_size.height = vk->tex_h; info.swapchain.viewport = vk->vk_vp; @@ -1745,7 +1764,9 @@ static bool vulkan_frame(void *data, const void *frame, quad.texture = optimal; } - quad.sampler = vk->samplers.linear; + quad.sampler = optimal->mipmap ? + vk->samplers.mipmap_linear : vk->samplers.linear; + quad.mvp = &vk->mvp_no_rot; quad.color.r = 1.0f; quad.color.g = 1.0f; @@ -2133,10 +2154,9 @@ static uintptr_t vulkan_load_texture(void *video_data, void *data, image->width, image->height, VK_FORMAT_B8G8R8A8_UNORM, image->pixels, NULL, VULKAN_TEXTURE_STATIC); - /* TODO: Actually add mipmapping support. - * Optimal tiling would make sense here as well ... */ texture->default_smooth = filter_type == TEXTURE_FILTER_MIPMAP_LINEAR || filter_type == TEXTURE_FILTER_LINEAR; + texture->mipmap = filter_type == TEXTURE_FILTER_MIPMAP_LINEAR; return (uintptr_t)texture; } @@ -2366,7 +2386,8 @@ static void vulkan_render_overlay(vk_t *vk) memset(&call, 0, sizeof(call)); call.pipeline = vk->display.pipelines[3]; /* Strip with blend */ call.texture = &vk->overlay.images[i]; - call.sampler = vk->samplers.linear; + call.sampler = call.texture->mipmap ? + vk->samplers.mipmap_linear : vk->samplers.linear; call.uniform = &vk->mvp; call.uniform_size = sizeof(vk->mvp); call.vbo = ⦥ diff --git a/gfx/drivers_font/vulkan_raster_font.c b/gfx/drivers_font/vulkan_raster_font.c index e562b867b7..26a82b3101 100644 --- a/gfx/drivers_font/vulkan_raster_font.c +++ b/gfx/drivers_font/vulkan_raster_font.c @@ -241,7 +241,7 @@ static void vulkan_raster_font_flush(vulkan_raster_t *font) const struct vk_draw_triangles call = { font->vk->pipelines.font, &font->texture, - font->vk->samplers.nearest, + font->vk->samplers.mipmap_linear, &font->vk->mvp, sizeof(font->vk->mvp), &font->range, diff --git a/gfx/drivers_shader/shader_vulkan.cpp b/gfx/drivers_shader/shader_vulkan.cpp index ce44047640..10d67acf3a 100644 --- a/gfx/drivers_shader/shader_vulkan.cpp +++ b/gfx/drivers_shader/shader_vulkan.cpp @@ -27,6 +27,7 @@ #include "../video_shader_driver.h" #include "../../verbosity.h" #include "../../msg_hash.h" +#include "../../libretro-common/include/formats/image.h" using namespace std; @@ -38,8 +39,20 @@ static const uint32_t opaque_frag[] = #include "../drivers/vulkan_shaders/opaque.frag.inc" ; -static void image_layout_transition( - VkCommandBuffer cmd, VkImage image, +static unsigned num_miplevels(unsigned width, unsigned height) +{ + unsigned size = std::max(width, height); + unsigned levels = 0; + while (size) + { + levels++; + size >>= 1; + } + return levels; +} + +static void image_layout_transition_levels( + VkCommandBuffer cmd, VkImage image, uint32_t levels, VkImageLayout old_layout, VkImageLayout new_layout, VkAccessFlags src_access, VkAccessFlags dst_access, VkPipelineStageFlags src_stages, VkPipelineStageFlags dst_stages) @@ -54,8 +67,8 @@ static void image_layout_transition( barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.image = image; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.layerCount = 1; + barrier.subresourceRange.levelCount = levels; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; vkCmdPipelineBarrier(cmd, src_stages, @@ -66,6 +79,18 @@ static void image_layout_transition( 1, &barrier); } +static void image_layout_transition( + VkCommandBuffer cmd, VkImage image, + VkImageLayout old_layout, VkImageLayout new_layout, + VkAccessFlags src_access, VkAccessFlags dst_access, + VkPipelineStageFlags src_stages, VkPipelineStageFlags dst_stages) +{ + image_layout_transition_levels(cmd, image, VK_REMAINING_MIP_LEVELS, + old_layout, new_layout, + src_access, dst_access, + src_stages, dst_stages); +} + static uint32_t find_memory_type( const VkPhysicalDeviceMemoryProperties &mem_props, uint32_t device_reqs, uint32_t host_reqs) @@ -134,6 +159,8 @@ struct Texture { vulkan_filter_chain_texture texture; vulkan_filter_chain_filter filter; + vulkan_filter_chain_filter mip_filter; + vulkan_filter_chain_address address; }; class DeferredDisposer @@ -164,11 +191,63 @@ class Buffer const VkBuffer &get_buffer() const { return buffer; } + Buffer(Buffer&&) = delete; + void operator=(Buffer&&) = delete; + private: VkDevice device; VkBuffer buffer; VkDeviceMemory memory; size_t size; + void *mapped = nullptr; +}; + +class StaticTexture +{ + public: + StaticTexture(string id, + VkDevice device, + VkImage image, + VkImageView view, + VkDeviceMemory memory, + unique_ptr buffer, + unsigned width, unsigned height, + bool linear, + bool mipmap, + vulkan_filter_chain_address address); + ~StaticTexture(); + + StaticTexture(StaticTexture&&) = delete; + void operator=(StaticTexture&&) = delete; + + void release_staging_buffer() + { + buffer.reset(); + } + + void set_id(string name) + { + id = move(name); + } + + const string &get_id() const + { + return id; + } + + const Texture &get_texture() const + { + return texture; + } + + private: + VkDevice device; + VkImage image; + VkImageView view; + VkDeviceMemory memory; + unique_ptr buffer; + string id; + Texture texture; }; class Framebuffer @@ -176,7 +255,7 @@ class Framebuffer public: Framebuffer(VkDevice device, const VkPhysicalDeviceMemoryProperties &mem_props, - const Size2D &max_size, VkFormat format); + const Size2D &max_size, VkFormat format, unsigned max_levels); ~Framebuffer(); Framebuffer(Framebuffer&&) = delete; @@ -193,13 +272,19 @@ class Framebuffer void clear(VkCommandBuffer cmd); void copy(VkCommandBuffer cmd, VkImage image, VkImageLayout layout); + unsigned get_levels() const { return levels; } + void generate_mips(VkCommandBuffer cmd); + private: VkDevice device = VK_NULL_HANDLE; const VkPhysicalDeviceMemoryProperties &memory_properties; VkImage image = VK_NULL_HANDLE; VkImageView view = VK_NULL_HANDLE; + VkImageView fb_view = VK_NULL_HANDLE; Size2D size; VkFormat format; + unsigned max_levels; + unsigned levels = 0; VkFramebuffer framebuffer = VK_NULL_HANDLE; VkRenderPass render_pass = VK_NULL_HANDLE; @@ -229,11 +314,12 @@ struct CommonResources size_t ubo_offset = 0; size_t ubo_alignment = 1; - VkSampler samplers[2]; + VkSampler samplers[VULKAN_FILTER_CHAIN_COUNT][VULKAN_FILTER_CHAIN_COUNT][VULKAN_FILTER_CHAIN_ADDRESS_COUNT]; vector original_history; vector framebuffer_feedback; vector pass_outputs; + vector> luts; unordered_map texture_semantic_map; unordered_map texture_semantic_uniform_map; @@ -321,6 +407,16 @@ class Pass return pass_info.source_filter; } + vulkan_filter_chain_filter get_mip_filter() const + { + return pass_info.mip_filter; + } + + vulkan_filter_chain_address get_address_mode() const + { + return pass_info.address; + } + void set_common_resources(CommonResources *common) { this->common = common; @@ -447,6 +543,9 @@ struct vulkan_filter_chain void set_frame_count_period(unsigned pass, unsigned period); void set_pass_name(unsigned pass, const char *name); + void add_static_texture(unique_ptr texture); + void release_staging_buffers(); + private: VkDevice device; VkPhysicalDevice gpu; @@ -572,6 +671,17 @@ void vulkan_filter_chain::set_input_texture( input_texture = texture; } +void vulkan_filter_chain::add_static_texture(unique_ptr texture) +{ + common.luts.push_back(move(texture)); +} + +void vulkan_filter_chain::release_staging_buffers() +{ + for (auto &lut : common.luts) + lut->release_staging_buffer(); +} + void vulkan_filter_chain::set_frame_count(uint64_t count) { for (auto &pass : passes) @@ -616,6 +726,8 @@ void vulkan_filter_chain::update_history_info() source.texture.width = texture->get_size().width; source.texture.height = texture->get_size().height; source.filter = passes.front()->get_source_filter(); + source.mip_filter = passes.front()->get_mip_filter(); + source.address = passes.front()->get_address_mode(); i++; } } @@ -638,6 +750,8 @@ void vulkan_filter_chain::update_feedback_info() source.texture.width = fb->get_size().width; source.texture.height = fb->get_size().height; source.filter = passes[i]->get_source_filter(); + source.mip_filter = passes[i]->get_mip_filter(); + source.address = passes[i]->get_address_mode(); } } @@ -668,7 +782,7 @@ bool vulkan_filter_chain::init_history() for (unsigned i = 0; i < required_images; i++) { original_history.emplace_back(new Framebuffer(device, memory_properties, - max_input_size, original_format)); + max_input_size, original_format, 1)); } RARCH_LOG("[Vulkan filter chain]: Using history of %u frames.\n", required_images); @@ -765,6 +879,19 @@ bool vulkan_filter_chain::init_alias() slang_texture_semantic_map{ SLANG_TEXTURE_SEMANTIC_PASS_FEEDBACK, i })) return false; } + + for (auto &lut : common.luts) + { + unsigned i = &lut - common.luts.data(); + if (!set_unique_map(common.texture_semantic_map, lut->get_id(), + slang_texture_semantic_map{ SLANG_TEXTURE_SEMANTIC_USER, i })) + return false; + + if (!set_unique_map(common.texture_semantic_uniform_map, lut->get_id() + "Size", + slang_texture_semantic_map{ SLANG_TEXTURE_SEMANTIC_USER, i })) + return false; + } + return true; } @@ -795,6 +922,7 @@ bool vulkan_filter_chain::init_ubo() VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)); } + common.ubo_mapped = static_cast(common.ubo->map()); return true; } @@ -855,14 +983,16 @@ void vulkan_filter_chain::build_offscreen_passes(VkCommandBuffer cmd, update_history_info(); update_feedback_info(); - common.ubo_mapped = static_cast(common.ubo->map()); - unsigned i; DeferredDisposer disposer(deferred_calls[current_sync_index]); const Texture original = { - input_texture, passes.front()->get_source_filter() }; - Texture source = { - input_texture, passes.front()->get_source_filter() }; + input_texture, + passes.front()->get_source_filter(), + passes.front()->get_mip_filter(), + passes.front()->get_address_mode(), + }; + + Texture source = original; for (i = 0; i < passes.size() - 1; i++) { @@ -875,6 +1005,8 @@ void vulkan_filter_chain::build_offscreen_passes(VkCommandBuffer cmd, source.texture.width = fb.get_size().width; source.texture.height = fb.get_size().height; source.filter = passes[i + 1]->get_source_filter(); + source.mip_filter = passes[i + 1]->get_mip_filter(); + source.address = passes[i + 1]->get_address_mode(); common.pass_outputs[i] = source; } @@ -942,10 +1074,21 @@ void vulkan_filter_chain::build_viewport_pass( Texture source; DeferredDisposer disposer(deferred_calls[current_sync_index]); const Texture original = { - input_texture, passes.front()->get_source_filter() }; + input_texture, + passes.front()->get_source_filter(), + passes.front()->get_mip_filter(), + passes.front()->get_address_mode(), + }; if (passes.size() == 1) - source = { input_texture, passes.back()->get_source_filter() }; + { + source = { + input_texture, + passes.back()->get_source_filter(), + passes.back()->get_mip_filter(), + passes.back()->get_address_mode(), + }; + } else { auto &fb = passes[passes.size() - 2]->get_framebuffer(); @@ -954,14 +1097,13 @@ void vulkan_filter_chain::build_viewport_pass( source.texture.width = fb.get_size().width; source.texture.height = fb.get_size().height; source.filter = passes.back()->get_source_filter(); + source.mip_filter = passes.back()->get_mip_filter(); + source.address = passes.back()->get_address_mode(); } passes.back()->build_commands(disposer, cmd, original, source, vp, mvp); - common.ubo->unmap(); - common.ubo_mapped = nullptr; - // If we need to keep old frames, copy it after fragment is complete. // TODO: We can improve pipelining by figuring out which pass is the last that reads from // the history and dispatch the copy earlier. @@ -973,6 +1115,44 @@ void vulkan_filter_chain::build_viewport_pass( pass->end_frame(); } +StaticTexture::StaticTexture(string id, + VkDevice device, + VkImage image, + VkImageView view, + VkDeviceMemory memory, + unique_ptr buffer, + unsigned width, unsigned height, + bool linear, + bool mipmap, + vulkan_filter_chain_address address) + : id(move(id)), + device(device), + image(image), + view(view), + memory(memory), + buffer(move(buffer)) +{ + texture.filter = linear ? VULKAN_FILTER_CHAIN_LINEAR : VULKAN_FILTER_CHAIN_NEAREST; + texture.mip_filter = + mipmap && linear ? VULKAN_FILTER_CHAIN_LINEAR : VULKAN_FILTER_CHAIN_NEAREST; + texture.address = address; + texture.texture.image = image; + texture.texture.view = view; + texture.texture.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + texture.texture.width = width; + texture.texture.height = height; +} + +StaticTexture::~StaticTexture() +{ + if (view != VK_NULL_HANDLE) + vkDestroyImageView(device, view, nullptr); + if (image != VK_NULL_HANDLE) + vkDestroyImage(device, image, nullptr); + if (memory != VK_NULL_HANDLE) + vkFreeMemory(device, memory, nullptr); +} + Buffer::Buffer(VkDevice device, const VkPhysicalDeviceMemoryProperties &mem_props, size_t size, VkBufferUsageFlags usage) : @@ -1001,19 +1181,27 @@ Buffer::Buffer(VkDevice device, void *Buffer::map() { - void *ptr = nullptr; - if (vkMapMemory(device, memory, 0, size, 0, &ptr) == VK_SUCCESS) - return ptr; - return nullptr; + if (!mapped) + { + if (vkMapMemory(device, memory, 0, size, 0, &mapped) == VK_SUCCESS) + return mapped; + else + return nullptr; + } + return mapped; } void Buffer::unmap() { - vkUnmapMemory(device, memory); + if (mapped) + vkUnmapMemory(device, memory); + mapped = nullptr; } Buffer::~Buffer() { + if (mapped) + unmap(); if (memory != VK_NULL_HANDLE) vkFreeMemory(device, memory, nullptr); if (buffer != VK_NULL_HANDLE) @@ -1370,35 +1558,94 @@ CommonResources::CommonResources(VkDevice device, vbo->unmap(); VkSamplerCreateInfo info = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; - info.magFilter = VK_FILTER_NEAREST; - info.minFilter = VK_FILTER_NEAREST; - info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; info.mipLodBias = 0.0f; info.maxAnisotropy = 1.0f; info.compareEnable = false; info.minLod = 0.0f; - info.maxLod = 0.0f; + info.maxLod = VK_LOD_CLAMP_NONE; info.unnormalizedCoordinates = false; - info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - vkCreateSampler(device, - &info, nullptr, &samplers[VULKAN_FILTER_CHAIN_NEAREST]); + for (unsigned i = 0; i < VULKAN_FILTER_CHAIN_COUNT; i++) + { + switch (static_cast(i)) + { + case VULKAN_FILTER_CHAIN_LINEAR: + info.magFilter = VK_FILTER_LINEAR; + info.minFilter = VK_FILTER_LINEAR; + break; - info.magFilter = VK_FILTER_LINEAR; - info.minFilter = VK_FILTER_LINEAR; + case VULKAN_FILTER_CHAIN_NEAREST: + info.magFilter = VK_FILTER_NEAREST; + info.minFilter = VK_FILTER_NEAREST; + break; - vkCreateSampler(device, - &info, nullptr, &samplers[VULKAN_FILTER_CHAIN_LINEAR]); + default: + break; + } + + for (unsigned j = 0; j < VULKAN_FILTER_CHAIN_COUNT; j++) + { + switch (static_cast(j)) + { + case VULKAN_FILTER_CHAIN_LINEAR: + info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + break; + + case VULKAN_FILTER_CHAIN_NEAREST: + info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + break; + + default: + break; + } + + for (unsigned k = 0; k < VULKAN_FILTER_CHAIN_ADDRESS_COUNT; k++) + { + VkSamplerAddressMode mode = VK_SAMPLER_ADDRESS_MODE_MAX_ENUM; + + switch (static_cast(k)) + { + case VULKAN_FILTER_CHAIN_ADDRESS_REPEAT: + mode = VK_SAMPLER_ADDRESS_MODE_REPEAT; + break; + + case VULKAN_FILTER_CHAIN_ADDRESS_MIRRORED_REPEAT: + mode = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + break; + + case VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_EDGE: + mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + break; + + case VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_BORDER: + mode = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; + break; + + case VULKAN_FILTER_CHAIN_ADDRESS_MIRROR_CLAMP_TO_EDGE: + mode = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + break; + + default: + break; + } + + info.addressModeU = mode; + info.addressModeV = mode; + info.addressModeW = mode; + vkCreateSampler(device, &info, nullptr, &samplers[i][j][k]); + } + } + } } CommonResources::~CommonResources() { - for (auto &samp : samplers) - if (samp != VK_NULL_HANDLE) - vkDestroySampler(device, samp, nullptr); + for (auto &i : samplers) + for (auto &j : i) + for (auto &k : j) + if (k != VK_NULL_HANDLE) + vkDestroySampler(device, k, nullptr); } void Pass::allocate_buffers() @@ -1429,7 +1676,7 @@ bool Pass::init_feedback() framebuffer_feedback = unique_ptr( new Framebuffer(device, memory_properties, current_framebuffer_size, - pass_info.rt_format)); + pass_info.rt_format, pass_info.max_levels)); return true; } @@ -1443,7 +1690,7 @@ bool Pass::build() framebuffer = unique_ptr( new Framebuffer(device, memory_properties, current_framebuffer_size, - pass_info.rt_format)); + pass_info.rt_format, pass_info.max_levels)); } reflection = slang_reflection{}; @@ -1485,7 +1732,7 @@ void Pass::set_texture(VkDescriptorSet set, unsigned binding, const Texture &texture) { VkDescriptorImageInfo image_info; - image_info.sampler = common->samplers[texture.filter]; + image_info.sampler = common->samplers[texture.filter][texture.mip_filter][texture.address]; image_info.imageView = texture.texture.view; image_info.imageLayout = texture.texture.layout; @@ -1595,6 +1842,7 @@ void Pass::build_semantics(VkDescriptorSet set, uint8_t *buffer, current_framebuffer_size.width, current_framebuffer_size.height); build_semantic_vec4(buffer, SLANG_SEMANTIC_FINAL_VIEWPORT, unsigned(current_viewport.width), unsigned(current_viewport.height)); + build_semantic_uint(buffer, SLANG_SEMANTIC_FRAME_COUNT, frame_count_period ? uint32_t(frame_count % frame_count_period) : uint32_t(frame_count)); @@ -1634,6 +1882,16 @@ void Pass::build_semantics(VkDescriptorSet set, uint8_t *buffer, texture); i++; } + + // LUTs. + i = 0; + for (auto &lut : common->luts) + { + build_semantic_texture_array(set, buffer, + SLANG_TEXTURE_SEMANTIC_USER, i, + lut->get_texture()); + i++; + } } void Pass::build_commands( @@ -1670,7 +1928,8 @@ void Pass::build_commands( { set_uniform_buffer(sets[sync_index], reflection.ubo_binding, common->ubo->get_buffer(), - ubo_offset, reflection.ubo_size); + ubo_offset + sync_index * common->ubo_sync_index_stride, + reflection.ubo_size); } // The final pass is always executed inside @@ -1680,8 +1939,8 @@ void Pass::build_commands( if (!final_pass) { // Render. - image_layout_transition(cmd, - framebuffer->get_image(), + image_layout_transition_levels(cmd, + framebuffer->get_image(), 1, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 0, @@ -1756,30 +2015,37 @@ void Pass::build_commands( { vkCmdEndRenderPass(cmd); - // Barrier to sync with next pass. - image_layout_transition( - cmd, - framebuffer->get_image(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + if (framebuffer->get_levels() > 1) + framebuffer->generate_mips(cmd); + else + { + // Barrier to sync with next pass. + image_layout_transition( + cmd, + framebuffer->get_image(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } } } Framebuffer::Framebuffer( VkDevice device, const VkPhysicalDeviceMemoryProperties &mem_props, - const Size2D &max_size, VkFormat format) : + const Size2D &max_size, VkFormat format, + unsigned max_levels) : device(device), memory_properties(mem_props), size(max_size), - format(format) + format(format), + max_levels(max(max_levels, 1u)) { - RARCH_LOG("[Vulkan filter chain]: Creating framebuffer %u x %u.\n", - max_size.width, max_size.height); + RARCH_LOG("[Vulkan filter chain]: Creating framebuffer %u x %u (max %u level(s)).\n", + max_size.width, max_size.height, max_levels); init_render_pass(); init(nullptr); } @@ -1811,6 +2077,132 @@ void Framebuffer::clear(VkCommandBuffer cmd) VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); } +void Framebuffer::generate_mips(VkCommandBuffer cmd) +{ + // This is run every frame, so make sure + // we aren't opting into the "lazy" way of doing this. :) + VkImageMemoryBarrier barriers[2] = { + { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }, + { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER }, + }; + + // First, transfer the input mip level to TRANSFER_SRC_OPTIMAL. + // This should allow the surface to stay compressed. + // All subsequent mip-layers are now transferred into DST_OPTIMAL from + // UNDEFINED at this point. + + // Input + barriers[0].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barriers[0].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barriers[0].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = image; + barriers[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barriers[0].subresourceRange.baseMipLevel = 0; + barriers[0].subresourceRange.levelCount = 1; + barriers[0].subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + // The rest of the mip chain + barriers[1].srcAccessMask = 0; + barriers[1].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barriers[1].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barriers[1].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = image; + barriers[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barriers[1].subresourceRange.baseMipLevel = 1; + barriers[1].subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barriers[1].subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + false, + 0, nullptr, + 0, nullptr, + 2, barriers); + + for (unsigned i = 1; i < levels; i++) + { + // For subsequent passes, we have to transition from DST_OPTIMAL to SRC_OPTIMAL, + // but only do so one mip-level at a time. + if (i > 1) + { + barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barriers[0].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barriers[0].subresourceRange.baseMipLevel = i - 1; + barriers[0].subresourceRange.levelCount = 1; + barriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + false, + 0, nullptr, + 0, nullptr, + 1, barriers); + } + + VkImageBlit blit_region = {}; + unsigned src_width = std::max(size.width >> (i - 1), 1u); + unsigned src_height = std::max(size.height >> (i - 1), 1u); + unsigned target_width = std::max(size.width >> i, 1u); + unsigned target_height = std::max(size.height >> i, 1u); + + blit_region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit_region.srcSubresource.mipLevel = i - 1; + blit_region.srcSubresource.baseArrayLayer = 0; + blit_region.srcSubresource.layerCount = 1; + blit_region.dstSubresource = blit_region.srcSubresource; + blit_region.dstSubresource.mipLevel = i; + blit_region.srcOffsets[1].x = src_width; + blit_region.srcOffsets[1].y = src_height; + blit_region.srcOffsets[1].z = 1; + blit_region.dstOffsets[1].x = target_width; + blit_region.dstOffsets[1].y = target_height; + blit_region.dstOffsets[1].z = 1; + + vkCmdBlitImage(cmd, + image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, &blit_region, VK_FILTER_LINEAR); + } + + // We are now done, and we have all mip-levels except the last in TRANSFER_SRC_OPTIMAL, + // and the last one still on TRANSFER_DST_OPTIMAL, so do a final barrier which + // moves everything to SHADER_READ_ONLY_OPTIMAL in one go along with the execution barrier to next pass. + // Read-to-read memory barrier, so only need execution barrier for first transition. + barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barriers[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barriers[0].subresourceRange.baseMipLevel = 0; + barriers[0].subresourceRange.levelCount = levels - 1; + barriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barriers[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + // This is read-after-write barrier. + barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barriers[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barriers[1].subresourceRange.baseMipLevel = levels - 1; + barriers[1].subresourceRange.levelCount = 1; + barriers[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barriers[1].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + vkCmdPipelineBarrier(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + false, + 0, nullptr, + 0, nullptr, + 2, barriers); + + // Next pass will wait for ALL_GRAPHICS_BIT, and since we have dstStage as FRAGMENT_SHADER, + // the dependency chain will ensure we don't start next pass until the mipchain is complete. +} + void Framebuffer::copy(VkCommandBuffer cmd, VkImage src_image, VkImageLayout src_layout) { @@ -1850,15 +2242,18 @@ void Framebuffer::init(DeferredDisposer *disposer) info.extent.width = size.width; info.extent.height = size.height; info.extent.depth = 1; - info.mipLevels = 1; + info.mipLevels = min(max_levels, num_miplevels(size.width, size.height)); info.arrayLayers = 1; info.samples = VK_SAMPLE_COUNT_1_BIT; info.tiling = VK_IMAGE_TILING_OPTIMAL; info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + levels = info.mipLevels; vkCreateImage(device, &info, nullptr, &image); @@ -1897,7 +2292,7 @@ void Framebuffer::init(DeferredDisposer *disposer) view_info.image = image; view_info.subresourceRange.baseMipLevel = 0; view_info.subresourceRange.baseArrayLayer = 0; - view_info.subresourceRange.levelCount = 1; + view_info.subresourceRange.levelCount = levels; view_info.subresourceRange.layerCount = 1; view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; view_info.components.r = VK_COMPONENT_SWIZZLE_R; @@ -1906,6 +2301,8 @@ void Framebuffer::init(DeferredDisposer *disposer) view_info.components.a = VK_COMPONENT_SWIZZLE_A; vkCreateImageView(device, &view_info, nullptr, &view); + view_info.subresourceRange.levelCount = 1; + vkCreateImageView(device, &view_info, nullptr, &fb_view); init_framebuffer(); } @@ -1949,7 +2346,7 @@ void Framebuffer::init_framebuffer() VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO }; info.renderPass = render_pass; info.attachmentCount = 1; - info.pAttachments = &view; + info.pAttachments = &fb_view; info.width = size.width; info.height = size.height; info.layers = 1; @@ -1972,16 +2369,19 @@ void Framebuffer::set_size(DeferredDisposer &disposer, const Size2D &size) // // Fake lambda init captures for C++11. // - auto d = device; - auto i = image; - auto v = view; - auto fb = framebuffer; + auto d = device; + auto i = image; + auto v = view; + auto fbv = fb_view; + auto fb = framebuffer; disposer.defer([=] { if (fb != VK_NULL_HANDLE) vkDestroyFramebuffer(d, fb, nullptr); if (v != VK_NULL_HANDLE) vkDestroyImageView(d, v, nullptr); + if (fbv != VK_NULL_HANDLE) + vkDestroyImageView(d, fbv, nullptr); if (i != VK_NULL_HANDLE) vkDestroyImage(d, i, nullptr); }); @@ -1998,6 +2398,8 @@ Framebuffer::~Framebuffer() vkDestroyRenderPass(device, render_pass, nullptr); if (view != VK_NULL_HANDLE) vkDestroyImageView(device, view, nullptr); + if (fb_view != VK_NULL_HANDLE) + vkDestroyImageView(device, fb_view, nullptr); if (image != VK_NULL_HANDLE) vkDestroyImage(device, image, nullptr); if (memory.memory != VK_NULL_HANDLE) @@ -2030,6 +2432,8 @@ vulkan_filter_chain_t *vulkan_filter_chain_create_default( pass_info.scale_y = 1.0f; pass_info.rt_format = tmpinfo.swapchain.format; pass_info.source_filter = filter; + pass_info.mip_filter = VULKAN_FILTER_CHAIN_NEAREST; + pass_info.address = VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_EDGE; chain->set_pass_info(0, pass_info); chain->set_shader(0, VK_SHADER_STAGE_VERTEX_BIT, @@ -2099,6 +2503,223 @@ static VkFormat glslang_format_to_vk(glslang_format fmt) } } +static vulkan_filter_chain_address wrap_to_address(gfx_wrap_type type) +{ + switch (type) + { + default: + case RARCH_WRAP_EDGE: + return VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_EDGE; + + case RARCH_WRAP_BORDER: + return VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_BORDER; + + case RARCH_WRAP_REPEAT: + return VULKAN_FILTER_CHAIN_ADDRESS_REPEAT; + + case RARCH_WRAP_MIRRORED_REPEAT: + return VULKAN_FILTER_CHAIN_ADDRESS_MIRRORED_REPEAT; + } +} + +static unique_ptr vulkan_filter_chain_load_lut(VkCommandBuffer cmd, + const struct vulkan_filter_chain_create_info *info, + vulkan_filter_chain *chain, + const video_shader_lut *shader) +{ + texture_image image; + VkMemoryRequirements mem_reqs; + VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; + VkImageViewCreateInfo view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + VkMemoryAllocateInfo alloc = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + VkImage tex = VK_NULL_HANDLE; + VkDeviceMemory memory = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + VkBufferImageCopy region = {}; + void *ptr = nullptr; + unique_ptr buffer; + + if (!image_texture_load(&image, shader->path)) + return {}; + + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_B8G8R8A8_UNORM; + image_info.extent.width = image.width; + image_info.extent.height = image.height; + image_info.extent.depth = 1; + image_info.mipLevels = shader->mipmap ? num_miplevels(image.width, image.height) : 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + vkCreateImage(info->device, &image_info, nullptr, &tex); + vkGetImageMemoryRequirements(info->device, tex, &mem_reqs); + alloc.allocationSize = mem_reqs.size; + alloc.memoryTypeIndex = find_memory_type( + *info->memory_properties, + mem_reqs.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + if (vkAllocateMemory(info->device, &alloc, nullptr, &memory) != VK_SUCCESS) + goto error; + + vkBindImageMemory(info->device, tex, memory, 0); + + view_info.image = tex; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = VK_FORMAT_B8G8R8A8_UNORM; + view_info.components.r = VK_COMPONENT_SWIZZLE_R; + view_info.components.g = VK_COMPONENT_SWIZZLE_G; + view_info.components.b = VK_COMPONENT_SWIZZLE_B; + view_info.components.a = VK_COMPONENT_SWIZZLE_A; + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_info.subresourceRange.levelCount = image_info.mipLevels; + view_info.subresourceRange.layerCount = 1; + vkCreateImageView(info->device, &view_info, nullptr, &view); + + buffer = unique_ptr(new Buffer(info->device, *info->memory_properties, + image.width * image.height * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_SRC_BIT)); + ptr = buffer->map(); + memcpy(ptr, image.pixels, image.width * image.height * sizeof(uint32_t)); + buffer->unmap(); + + image_layout_transition(cmd, tex, + VK_IMAGE_LAYOUT_UNDEFINED, + shader->mipmap ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 0, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent.width = image.width; + region.imageExtent.height = image.height; + region.imageExtent.depth = 1; + + vkCmdCopyBufferToImage(cmd, buffer->get_buffer(), tex, + shader->mipmap ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + + for (unsigned i = 1; i < image_info.mipLevels; i++) + { + VkImageBlit blit_region = {}; + unsigned src_width = std::max(image.width >> (i - 1), 1u); + unsigned src_height = std::max(image.height >> (i - 1), 1u); + unsigned target_width = std::max(image.width >> i, 1u); + unsigned target_height = std::max(image.height >> i, 1u); + + blit_region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + blit_region.srcSubresource.mipLevel = i - 1; + blit_region.srcSubresource.baseArrayLayer = 0; + blit_region.srcSubresource.layerCount = 1; + blit_region.dstSubresource = blit_region.srcSubresource; + blit_region.dstSubresource.mipLevel = i; + blit_region.srcOffsets[1].x = src_width; + blit_region.srcOffsets[1].y = src_height; + blit_region.srcOffsets[1].z = 1; + blit_region.dstOffsets[1].x = target_width; + blit_region.dstOffsets[1].y = target_height; + blit_region.dstOffsets[1].z = 1; + + // Only injects execution and memory barriers, + // not actual transition. + image_layout_transition(cmd, tex, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + vkCmdBlitImage(cmd, + tex, VK_IMAGE_LAYOUT_GENERAL, + tex, VK_IMAGE_LAYOUT_GENERAL, + 1, &blit_region, VK_FILTER_LINEAR); + } + + image_layout_transition(cmd, tex, + shader->mipmap ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + + image_texture_free(&image); + image.pixels = nullptr; + + return unique_ptr(new StaticTexture(shader->id, info->device, + tex, view, memory, move(buffer), image.width, image.height, + shader->filter != RARCH_FILTER_NEAREST, + image_info.mipLevels > 1, + wrap_to_address(shader->wrap))); + +error: + if (image.pixels) + image_texture_free(&image); + if (tex != VK_NULL_HANDLE) + vkDestroyImage(info->device, tex, nullptr); + if (view != VK_NULL_HANDLE) + vkDestroyImageView(info->device, view, nullptr); + if (memory != VK_NULL_HANDLE) + vkFreeMemory(info->device, memory, nullptr); + return {}; +} + +static bool vulkan_filter_chain_load_luts( + const struct vulkan_filter_chain_create_info *info, + vulkan_filter_chain *chain, + video_shader *shader) +{ + VkCommandBufferBeginInfo begin_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + VkSubmitInfo submit_info = { + VK_STRUCTURE_TYPE_SUBMIT_INFO }; + VkCommandBuffer cmd = VK_NULL_HANDLE; + VkCommandBufferAllocateInfo cmd_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + bool recording = false; + + cmd_info.commandPool = info->command_pool; + cmd_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmd_info.commandBufferCount = 1; + + vkAllocateCommandBuffers(info->device, &cmd_info, &cmd); + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + vkBeginCommandBuffer(cmd, &begin_info); + recording = true; + + for (unsigned i = 0; i < shader->luts; i++) + { + auto image = vulkan_filter_chain_load_lut(cmd, info, chain, &shader->lut[i]); + if (!image) + { + RARCH_ERR("[Vulkan]: Failed to load LUT \"%s\".\n", shader->lut[i].path); + goto error; + } + + chain->add_static_texture(move(image)); + } + + vkEndCommandBuffer(cmd); + recording = false; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &cmd; + vkQueueSubmit(info->queue, 1, &submit_info, VK_NULL_HANDLE); + vkQueueWaitIdle(info->queue); + vkFreeCommandBuffers(info->device, info->command_pool, 1, &cmd); + chain->release_staging_buffers(); + return true; + +error: + if (recording) + vkEndCommandBuffer(cmd); + if (cmd != VK_NULL_HANDLE) + vkFreeCommandBuffers(info->device, info->command_pool, 1, &cmd); + return false; +} + vulkan_filter_chain_t *vulkan_filter_chain_create_from_preset( const struct vulkan_filter_chain_create_info *info, const char *path, vulkan_filter_chain_filter filter) @@ -2125,9 +2746,14 @@ vulkan_filter_chain_t *vulkan_filter_chain_create_from_preset( if (!chain) return nullptr; + if (shader->luts && !vulkan_filter_chain_load_luts(info, chain.get(), shader.get())) + return nullptr; + for (unsigned i = 0; i < shader->passes; i++) { const video_shader_pass *pass = &shader->pass[i]; + const video_shader_pass *next_pass = + i + 1 < shader->passes ? &shader->pass[i + 1] : nullptr; struct vulkan_filter_chain_pass_info pass_info; memset(&pass_info, 0, sizeof(pass_info)); @@ -2166,6 +2792,18 @@ vulkan_filter_chain_t *vulkan_filter_chain_create_from_preset( pass->filter == RARCH_FILTER_LINEAR ? VULKAN_FILTER_CHAIN_LINEAR : VULKAN_FILTER_CHAIN_NEAREST; } + pass_info.address = wrap_to_address(pass->wrap); + + // TODO: Expose max_levels in slangp. + // CGP format is a bit awkward in that it uses mipmap_input, + // so we much check if next pass needs the mipmapping. + if (next_pass && next_pass->mipmap) + pass_info.max_levels = ~0u; + else + pass_info.max_levels = 1; + + pass_info.mip_filter = pass->filter != RARCH_FILTER_NEAREST && pass_info.max_levels > 1 + ? VULKAN_FILTER_CHAIN_LINEAR : VULKAN_FILTER_CHAIN_NEAREST; bool explicit_format = output.meta.rt_format != SLANG_FORMAT_UNKNOWN; @@ -2261,7 +2899,11 @@ vulkan_filter_chain_t *vulkan_filter_chain_create_from_preset( pass_info.scale_x = 1.0f; pass_info.scale_y = 1.0f; pass_info.rt_format = tmpinfo.swapchain.format; + pass_info.source_filter = filter; + pass_info.mip_filter = VULKAN_FILTER_CHAIN_NEAREST; + pass_info.address = VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_EDGE; + chain->set_pass_info(shader->passes, pass_info); chain->set_shader(shader->passes, diff --git a/gfx/drivers_shader/shader_vulkan.h b/gfx/drivers_shader/shader_vulkan.h index 612d122349..4ebe7b27cd 100644 --- a/gfx/drivers_shader/shader_vulkan.h +++ b/gfx/drivers_shader/shader_vulkan.h @@ -31,7 +31,18 @@ typedef struct vulkan_filter_chain vulkan_filter_chain_t; enum vulkan_filter_chain_filter { VULKAN_FILTER_CHAIN_LINEAR = 0, - VULKAN_FILTER_CHAIN_NEAREST = 1 + VULKAN_FILTER_CHAIN_NEAREST = 1, + VULKAN_FILTER_CHAIN_COUNT +}; + +enum vulkan_filter_chain_address +{ + VULKAN_FILTER_CHAIN_ADDRESS_REPEAT = 0, + VULKAN_FILTER_CHAIN_ADDRESS_MIRRORED_REPEAT = 1, + VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_EDGE = 2, + VULKAN_FILTER_CHAIN_ADDRESS_CLAMP_TO_BORDER = 3, + VULKAN_FILTER_CHAIN_ADDRESS_MIRROR_CLAMP_TO_EDGE = 4, + VULKAN_FILTER_CHAIN_ADDRESS_COUNT }; struct vulkan_filter_chain_texture @@ -65,6 +76,11 @@ struct vulkan_filter_chain_pass_info /* The filter to use for source in this pass. */ enum vulkan_filter_chain_filter source_filter; + enum vulkan_filter_chain_filter mip_filter; + enum vulkan_filter_chain_address address; + + /* Maximum number of mip-levels to use. */ + unsigned max_levels; }; struct vulkan_filter_chain_swapchain_info @@ -81,6 +97,8 @@ struct vulkan_filter_chain_create_info VkPhysicalDevice gpu; const VkPhysicalDeviceMemoryProperties *memory_properties; VkPipelineCache pipeline_cache; + VkQueue queue; + VkCommandPool command_pool; unsigned num_passes; VkFormat original_format; diff --git a/gfx/drivers_shader/slang_reflection.cpp b/gfx/drivers_shader/slang_reflection.cpp index c9b9d11c4a..6567ba7ba5 100644 --- a/gfx/drivers_shader/slang_reflection.cpp +++ b/gfx/drivers_shader/slang_reflection.cpp @@ -29,6 +29,7 @@ static bool slang_texture_semantic_is_array(slang_texture_semantic sem) case SLANG_TEXTURE_SEMANTIC_ORIGINAL_HISTORY: case SLANG_TEXTURE_SEMANTIC_PASS_OUTPUT: case SLANG_TEXTURE_SEMANTIC_PASS_FEEDBACK: + case SLANG_TEXTURE_SEMANTIC_USER: return true; default: @@ -52,6 +53,7 @@ static const char *texture_semantic_names[] = { "OriginalHistory", "PassOutput", "PassFeedback", + "User", nullptr }; @@ -61,6 +63,7 @@ static const char *texture_semantic_uniform_names[] = { "OriginalHistorySize", "PassOutputSize", "PassFeedbackSize", + "UserSize", nullptr }; diff --git a/gfx/drivers_shader/slang_reflection.hpp b/gfx/drivers_shader/slang_reflection.hpp index 9394b8a3b4..cfb7cb0801 100644 --- a/gfx/drivers_shader/slang_reflection.hpp +++ b/gfx/drivers_shader/slang_reflection.hpp @@ -48,6 +48,11 @@ enum slang_texture_semantic // Canonical name: "PassFeedback#", e.g. "PassFeedback2". SLANG_TEXTURE_SEMANTIC_PASS_FEEDBACK = 4, + // Inputs from static textures, defined by the user. + // There is no canonical name, and the only way to use these semantics are by + // remapping. + SLANG_TEXTURE_SEMANTIC_USER = 5, + SLANG_NUM_TEXTURE_SEMANTICS, SLANG_INVALID_TEXTURE_SEMANTIC = -1 }; diff --git a/menu/drivers_display/menu_display_vulkan.c b/menu/drivers_display/menu_display_vulkan.c index 8fd817d7e1..04dfc8e5c4 100644 --- a/menu/drivers_display/menu_display_vulkan.c +++ b/menu/drivers_display/menu_display_vulkan.c @@ -193,8 +193,9 @@ static void menu_display_vk_draw(void *data) vk->display.pipelines[ to_display_pipeline(draw->prim_type, vk->display.blend)], texture, - texture->default_smooth - ? vk->samplers.linear : vk->samplers.nearest, + texture->mipmap ? + vk->samplers.mipmap_linear : + (texture->default_smooth ? vk->samplers.linear : vk->samplers.nearest), draw->matrix_data ? draw->matrix_data : menu_display_vk_get_default_mvp(), sizeof(math_matrix_4x4),