From e7002b6f96927eebfb2fc4d4f7ad00274ff64d39 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 2 Dec 2021 16:12:23 +0000 Subject: [PATCH] radv: use wave32 for raytracing Beginning of Quake II RTX, 50% resolution scale, RX 6800: 48 -> 54 FPS. Signed-off-by: Rhys Perry Reviewed-by: Bas Nieuwenhuizen Part-of: --- docs/envvars.rst | 2 ++ src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_device.c | 5 +++++ src/amd/vulkan/radv_pipeline.c | 2 ++ src/amd/vulkan/radv_pipeline_rt.c | 7 +++++++ src/amd/vulkan/radv_private.h | 2 ++ 6 files changed, 19 insertions(+) diff --git a/docs/envvars.rst b/docs/envvars.rst index aa133a44c41..bc4a9786523 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -698,6 +698,8 @@ RADV driver environment variables enable rt extensions whose implementation is still experimental. ``sam`` enable optimizations to move more driver internal objects to VRAM. + ``rtwave64`` + enable wave64 for ray tracing shaders (GFX10+) :envvar:`RADV_TEX_ANISO` force anisotropy filter (up to 16) diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index ef7dbe6df23..754d5c82a11 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -81,6 +81,7 @@ enum { RADV_PERFTEST_NGGC = 1u << 9, RADV_PERFTEST_FORCE_EMULATE_RT = 1u << 10, RADV_PERFTEST_NV_MS = 1u << 11, + RADV_PERFTEST_RT_WAVE_64 = 1u << 12, }; bool radv_init_trace(struct radv_device *device); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c78f675bfed..56fe2b18aeb 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -724,6 +724,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->cs_wave_size = 64; device->ps_wave_size = 64; device->ge_wave_size = 64; + device->rt_wave_size = 64; if (device->rad_info.chip_class >= GFX10) { if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32) @@ -735,6 +736,9 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) device->ge_wave_size = 32; + + if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64)) + device->rt_wave_size = 32; } radv_physical_device_init_mem_types(device); @@ -879,6 +883,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P {"nggc", RADV_PERFTEST_NGGC}, {"force_emulate_rt", RADV_PERFTEST_FORCE_EMULATE_RT}, {"nv_ms", RADV_PERFTEST_NV_MS}, + {"rtwave64", RADV_PERFTEST_RT_WAVE_64}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 17332ed42ab..bca14213517 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -276,6 +276,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING; if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT) hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT; + if (device->physical_device->rt_wave_size == 64) + hash_flags |= RADV_HASH_SHADER_RT_WAVE64; if (device->physical_device->cs_wave_size == 32) hash_flags |= RADV_HASH_SHADER_CS_WAVE32; if (device->physical_device->ps_wave_size == 32) diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 27eeebaaccb..64dbf456ff3 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -2200,6 +2200,12 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, radv_hash_rt_shaders(hash, &local_create_info, radv_get_hash_flags(device, keep_statistic_info)); struct vk_shader_module module = {.base.type = VK_OBJECT_TYPE_SHADER_MODULE}; + VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = NULL, + .requiredSubgroupSize = device->physical_device->rt_wave_size, + }; + VkComputePipelineCreateInfo compute_info = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = NULL, @@ -2207,6 +2213,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = &subgroup_size, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = vk_shader_module_to_handle(&module), .pName = "main", diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index e4bda1027c8..4daa8a20d8b 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -287,6 +287,7 @@ struct radv_physical_device { uint8_t ps_wave_size; uint8_t cs_wave_size; uint8_t ge_wave_size; + uint8_t rt_wave_size; /* Whether to use the LLVM compiler backend */ bool use_llvm; @@ -1712,6 +1713,7 @@ struct radv_event { #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15) #define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16) #define RADV_HASH_SHADER_SPLIT_FMA (1 << 17) +#define RADV_HASH_SHADER_RT_WAVE64 (1 << 18) struct radv_pipeline_key;