radv: implement VK_EXT_shader_image_atomic_int64

The extension is only exposed on ACO and LLVM 11+ because of a LLVM bug.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7234>
This commit is contained in:
Rhys Perry 2020-10-19 18:02:35 +01:00 committed by Marge Bot
parent 9f43268772
commit 86ef139bf4
6 changed files with 68 additions and 22 deletions

View File

@ -16,3 +16,4 @@ driconf: add indirect_gl_extension_override
VK_AMD_mixed_attachment_samples on RADV (GFX6-GFX7).
GL_MESA_pack_invert on r100 and vieux
GL_ANGLE_pack_reverse_row_order
VK_EXT_shader_image_atomic_int64 on RADV

View File

@ -1454,6 +1454,13 @@ void radv_GetPhysicalDeviceFeatures2(
features->shaderTerminateInvocation = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
features->sparseImageInt64Atomics = false;
break;
}
default:
break;
}

View File

@ -157,6 +157,8 @@ EXTENSIONS = [
Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= GFX7'),
Extension('VK_EXT_shader_atomic_float', 1, True),
Extension('VK_EXT_shader_demote_to_helper_invocation',1, 'LLVM_VERSION_MAJOR >= 9 || !device->use_llvm'),
# LLVM versions before 11 have a bug where compilation fails when the result of an atomic is used
Extension('VK_EXT_shader_image_atomic_int64', 1, 'LLVM_VERSION_MAJOR >= 11 || !device->use_llvm'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_shader_stencil_export', 1, True),
Extension('VK_EXT_shader_subgroup_ballot', 1, True),

View File

@ -105,6 +105,9 @@ uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *de
return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
}
break;
case 64:
if (desc->nr_channels == 1)
return V_008F0C_BUF_DATA_FORMAT_32_32;
}
return V_008F0C_BUF_DATA_FORMAT_INVALID;
@ -367,6 +370,11 @@ uint32_t radv_translate_tex_dataformat(VkFormat format,
case 4:
return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
}
break;
case 64:
if (desc->nr_channels == 1)
return V_008F14_IMG_DATA_FORMAT_32_32;
break;
}
out_unknown:
@ -474,7 +482,8 @@ static bool radv_is_sampler_format_supported(VkFormat format, bool *linear_sampl
{
const struct vk_format_description *desc = vk_format_description(format);
uint32_t num_format;
if (!desc || format == VK_FORMAT_UNDEFINED)
if (!desc || format == VK_FORMAT_UNDEFINED ||
format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT)
return false;
num_format = radv_translate_tex_numformat(format, desc,
vk_format_get_first_non_void_channel(format));
@ -685,10 +694,12 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
}
if (radv_is_buffer_format_supported(format, &scaled)) {
buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
if (!scaled)
buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT) {
buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
if (!scaled)
buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
}
buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
}
if (vk_format_is_depth_or_stencil(format)) {
@ -758,7 +769,9 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
if (format == VK_FORMAT_R32_UINT ||
format == VK_FORMAT_R32_SINT ||
format == VK_FORMAT_R32_SFLOAT) {
format == VK_FORMAT_R32_SFLOAT ||
format == VK_FORMAT_R64_UINT ||
format == VK_FORMAT_R64_SINT) {
buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;

View File

@ -532,6 +532,35 @@ static unsigned radv_map_swizzle(unsigned swizzle)
}
}
static void
radv_compose_swizzle(const struct vk_format_description *desc,
const VkComponentMapping *mapping, enum vk_swizzle swizzle[4])
{
if (desc->format == VK_FORMAT_R64_UINT || desc->format == VK_FORMAT_R64_SINT) {
/* 64-bit formats only support storage images and storage images
* require identity component mappings. We use 32-bit
* instructions to access 64-bit images, so we need a special
* case here.
*
* The zw components are 1,0 so that they can be easily be used
* by loads to create the w component, which has to be 0 for
* NULL descriptors.
*/
swizzle[0] = VK_SWIZZLE_X;
swizzle[1] = VK_SWIZZLE_Y;
swizzle[2] = VK_SWIZZLE_1;
swizzle[3] = VK_SWIZZLE_0;
} else if (!mapping) {
for (unsigned i = 0; i < 4; i++)
swizzle[i] = desc->swizzle[i];
} else if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
} else {
vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
}
}
static void
radv_make_buffer_descriptor(struct radv_device *device,
struct radv_buffer *buffer,
@ -546,10 +575,13 @@ radv_make_buffer_descriptor(struct radv_device *device,
uint64_t va = gpu_address + buffer->offset;
unsigned num_format, data_format;
int first_non_void;
enum vk_swizzle swizzle[4];
desc = vk_format_description(vk_format);
first_non_void = vk_format_get_first_non_void_channel(vk_format);
stride = desc->block.bits / 8;
radv_compose_swizzle(desc, NULL, swizzle);
va += offset;
state[0] = va;
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
@ -560,10 +592,10 @@ radv_make_buffer_descriptor(struct radv_device *device,
}
state[2] = range;
state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
if (device->physical_device->rad_info.chip_class >= GFX10) {
const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
@ -798,12 +830,7 @@ gfx10_make_texture_descriptor(struct radv_device *device,
desc = vk_format_description(vk_format);
img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
} else {
vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
}
radv_compose_swizzle(desc, mapping, swizzle);
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
@ -924,12 +951,7 @@ si_make_texture_descriptor(struct radv_device *device,
desc = vk_format_description(vk_format);
if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
} else {
vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
}
radv_compose_swizzle(desc, mapping, swizzle);
first_non_void = vk_format_get_first_non_void_channel(vk_format);

View File

@ -429,6 +429,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
.float32_atomic_add = true,
.float64 = true,
.geometry_streams = true,
.image_atomic_int64 = true,
.image_ms_array = true,
.image_read_without_format = true,
.image_write_without_format = true,